From d24ea8a7336a2c392728e2cf909d607a680feb7b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 8 Oct 2018 16:30:48 +1100 Subject: KVM: PPC: Book3S: Simplify external interrupt handling Currently we use two bits in the vcpu pending_exceptions bitmap to indicate that an external interrupt is pending for the guest, one for "one-shot" interrupts that are cleared when delivered, and one for interrupts that persist until cleared by an explicit action of the OS (e.g. an acknowledge to an interrupt controller). The BOOK3S_IRQPRIO_EXTERNAL bit is used for one-shot interrupt requests and BOOK3S_IRQPRIO_EXTERNAL_LEVEL is used for persisting interrupts. In practice BOOK3S_IRQPRIO_EXTERNAL never gets used, because our Book3S platforms generally, and pseries in particular, expect external interrupt requests to persist until they are acknowledged at the interrupt controller. That combined with the confusion introduced by having two bits for what is essentially the same thing makes it attractive to simplify things by only using one bit. This patch does that. With this patch there is only BOOK3S_IRQPRIO_EXTERNAL, and by default it has the semantics of a persisting interrupt. In order to avoid breaking the ABI, we introduce a new "external_oneshot" flag which preserves the behaviour of the KVM_INTERRUPT ioctl with the KVM_INTERRUPT_SET argument. Reviewed-by: David Gibson Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- tools/perf/arch/powerpc/util/book3s_hv_exits.h | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h index 853b95d1e139..2011376c7ab5 100644 --- a/tools/perf/arch/powerpc/util/book3s_hv_exits.h +++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h @@ -15,7 +15,6 @@ {0x400, "INST_STORAGE"}, \ {0x480, "INST_SEGMENT"}, \ {0x500, "EXTERNAL"}, \ - {0x501, "EXTERNAL_LEVEL"}, \ {0x502, "EXTERNAL_HV"}, \ {0x600, "ALIGNMENT"}, \ {0x700, "PROGRAM"}, \ -- cgit v1.2.3 From 6c930268bcc4cef62194daf151097d2b94f67718 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:24 +0200 Subject: kvm: selftests: vcpu_setup: set cr4.osfxsr Guest code may want to call functions that have variable arguments. To do so, we either need to compile with -mno-sse or enable SSE in the VCPUs. As it should be pretty safe to turn on the feature, and -mno-sse would make linking test code with standard libraries difficult, we choose the feature enabling. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c index a3122f1949a8..bb6c65ebfa77 100644 --- a/tools/testing/selftests/kvm/lib/x86.c +++ b/tools/testing/selftests/kvm/lib/x86.c @@ -626,7 +626,7 @@ void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) switch (vm->mode) { case VM_MODE_FLAT48PG: sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; - sregs.cr4 |= X86_CR4_PAE; + sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); -- cgit v1.2.3 From 14c47b7530e2db1ab1d42ebbe99b2a58b8443ce7 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:25 +0200 Subject: kvm: selftests: introduce ucall Rework the guest exit to userspace code to generalize the concept into what it is, a "hypercall to userspace", and provide two implementations of it: the PortIO version currently used, but only useable by x86, and an MMIO version that other architectures (except s390) can use. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- tools/testing/selftests/kvm/cr4_cpuid_sync_test.c | 12 +- tools/testing/selftests/kvm/dirty_log_test.c | 5 +- tools/testing/selftests/kvm/include/kvm_util.h | 76 ++++++----- tools/testing/selftests/kvm/lib/kvm_util.c | 15 +-- .../testing/selftests/kvm/lib/kvm_util_internal.h | 1 + tools/testing/selftests/kvm/lib/ucall.c | 144 +++++++++++++++++++++ tools/testing/selftests/kvm/platform_info_test.c | 12 +- tools/testing/selftests/kvm/state_test.c | 23 ++-- tools/testing/selftests/kvm/vmx_tsc_adjust_test.c | 17 ++- 10 files changed, 222 insertions(+), 85 deletions(-) create mode 100644 tools/testing/selftests/kvm/lib/ucall.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ec32dad3c3f0..08426f036353 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -3,7 +3,7 @@ all: top_srcdir = ../../../../ UNAME_M := $(shell uname -m) -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c LIBKVM_x86_64 = lib/x86.c lib/vmx.c TEST_GEN_PROGS_x86_64 = platform_info_test diff --git a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c index 11ec358bf969..fd4f419fe9ab 100644 --- a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c @@ -67,6 +67,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct kvm_sregs sregs; struct kvm_cpuid_entry2 *entry; + struct ucall uc; int rc; entry = kvm_get_supported_cpuid_entry(1); @@ -87,21 +88,20 @@ int main(int argc, char *argv[]) rc = _vcpu_run(vm, VCPU_ID); if (run->exit_reason == KVM_EXIT_IO) { - switch (run->io.port) { - case GUEST_PORT_SYNC: + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: /* emulate hypervisor clearing CR4.OSXSAVE */ vcpu_sregs_get(vm, VCPU_ID, &sregs); sregs.cr4 &= ~X86_CR4_OSXSAVE; vcpu_sregs_set(vm, VCPU_ID, &sregs); break; - case GUEST_PORT_ABORT: + case UCALL_ABORT: TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); break; - case GUEST_PORT_DONE: + case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown port 0x%x.", - run->io.port); + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); } } } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 0c2cdc105f96..7cf3e4ae6046 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -110,7 +110,7 @@ void *vcpu_worker(void *data) uint64_t loops, *guest_array, pages_count = 0; struct kvm_vm *vm = data; struct kvm_run *run; - struct guest_args args; + struct ucall uc; run = vcpu_state(vm, VCPU_ID); @@ -124,9 +124,8 @@ void *vcpu_worker(void *data) while (!READ_ONCE(host_quit)) { /* Let the guest to dirty these random pages */ ret = _vcpu_run(vm, VCPU_ID); - guest_args_read(vm, VCPU_ID, &args); if (run->exit_reason == KVM_EXIT_IO && - args.port == GUEST_PORT_SYNC) { + get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) { pages_count += TEST_PAGES_PER_LOOP; generate_random_array(guest_array, TEST_PAGES_PER_LOOP); } else { diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 3acf9a91704c..22667dd48a9c 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -152,43 +152,49 @@ allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region); int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); -#define GUEST_PORT_SYNC 0x1000 -#define GUEST_PORT_ABORT 0x1001 -#define GUEST_PORT_DONE 0x1002 - -static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1) -{ - __asm__ __volatile__("in %[port], %%al" - : - : [port]"d"(port), "D"(arg0), "S"(arg1) - : "rax"); -} - -/* - * Allows to pass three arguments to the host: port is 16bit wide, - * arg0 & arg1 are 64bit wide - */ -#define GUEST_SYNC_ARGS(_port, _arg0, _arg1) \ - __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1)) - -#define GUEST_ASSERT(_condition) do { \ - if (!(_condition)) \ - GUEST_SYNC_ARGS(GUEST_PORT_ABORT, \ - "Failed guest assert: " \ - #_condition, __LINE__); \ - } while (0) - -#define GUEST_SYNC(stage) GUEST_SYNC_ARGS(GUEST_PORT_SYNC, "hello", stage) +#define sync_global_to_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(_p, &(g), sizeof(g)); \ +}) + +#define sync_global_from_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(&(g), _p, sizeof(g)); \ +}) + +/* ucall implementation types */ +typedef enum { + UCALL_PIO, + UCALL_MMIO, +} ucall_type_t; + +/* Common ucalls */ +enum { + UCALL_NONE, + UCALL_SYNC, + UCALL_ABORT, + UCALL_DONE, +}; -#define GUEST_DONE() GUEST_SYNC_ARGS(GUEST_PORT_DONE, 0, 0) +#define UCALL_MAX_ARGS 6 -struct guest_args { - uint64_t arg0; - uint64_t arg1; - uint16_t port; -} __attribute__ ((packed)); +struct ucall { + uint64_t cmd; + uint64_t args[UCALL_MAX_ARGS]; +}; -void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id, - struct guest_args *args); +void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg); +void ucall_uninit(struct kvm_vm *vm); +void ucall(uint64_t cmd, int nargs, ...); +uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); + +#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) +#define GUEST_DONE() ucall(UCALL_DONE, 0) +#define GUEST_ASSERT(_condition) do { \ + if (!(_condition)) \ + ucall(UCALL_ABORT, 2, \ + "Failed guest assert: " \ + #_condition, __LINE__); \ +} while (0) #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6fd8c089cafc..4bd31bf04f53 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -133,6 +133,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) case VM_MODE_FLAT48PG: vm->page_size = 0x1000; vm->page_shift = 12; + vm->va_bits = 48; /* Limit to 48-bit canonical virtual addresses. */ vm->vpages_valid = sparsebit_alloc(); @@ -1669,17 +1670,3 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) { return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); } - -void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id, - struct guest_args *args) -{ - struct kvm_run *run = vcpu_state(vm, vcpu_id); - struct kvm_regs regs; - - memset(®s, 0, sizeof(regs)); - vcpu_regs_get(vm, vcpu_id, ®s); - - args->port = run->io.port; - args->arg0 = regs.rdi; - args->arg1 = regs.rsi; -} diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 542ed606b338..8fa7c9f7567a 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -47,6 +47,7 @@ struct kvm_vm { int fd; unsigned int page_size; unsigned int page_shift; + unsigned int va_bits; uint64_t max_gfn; struct vcpu *vcpu_head; struct userspace_mem_region *userspace_mem_region_head; diff --git a/tools/testing/selftests/kvm/lib/ucall.c b/tools/testing/selftests/kvm/lib/ucall.c new file mode 100644 index 000000000000..4777f9bb5194 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/ucall.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#include "kvm_util.h" +#include "kvm_util_internal.h" + +#define UCALL_PIO_PORT ((uint16_t)0x1000) + +static ucall_type_t ucall_type; +static vm_vaddr_t *ucall_exit_mmio_addr; + +static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa) +{ + if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1)) + return false; + + virt_pg_map(vm, gpa, gpa, 0); + + ucall_exit_mmio_addr = (vm_vaddr_t *)gpa; + sync_global_to_guest(vm, ucall_exit_mmio_addr); + + return true; +} + +void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg) +{ + ucall_type = type; + sync_global_to_guest(vm, ucall_type); + + if (type == UCALL_PIO) + return; + + if (type == UCALL_MMIO) { + vm_paddr_t gpa, start, end, step; + bool ret; + + if (arg) { + gpa = (vm_paddr_t)arg; + ret = ucall_mmio_init(vm, gpa); + TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa); + return; + } + + /* + * Find an address within the allowed virtual address space, + * that does _not_ have a KVM memory region associated with it. + * Identity mapping an address like this allows the guest to + * access it, but as KVM doesn't know what to do with it, it + * will assume it's something userspace handles and exit with + * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64. + * Here we start with a guess that the addresses around two + * thirds of the VA space are unmapped and then work both down + * and up from there in 1/6 VA space sized steps. + */ + start = 1ul << (vm->va_bits * 2 / 3); + end = 1ul << vm->va_bits; + step = 1ul << (vm->va_bits / 6); + for (gpa = start; gpa >= 0; gpa -= step) { + if (ucall_mmio_init(vm, gpa & ~(vm->page_size - 1))) + return; + } + for (gpa = start + step; gpa < end; gpa += step) { + if (ucall_mmio_init(vm, gpa & ~(vm->page_size - 1))) + return; + } + TEST_ASSERT(false, "Can't find a ucall mmio address"); + } +} + +void ucall_uninit(struct kvm_vm *vm) +{ + ucall_type = 0; + sync_global_to_guest(vm, ucall_type); + ucall_exit_mmio_addr = 0; + sync_global_to_guest(vm, ucall_exit_mmio_addr); +} + +static void ucall_pio_exit(struct ucall *uc) +{ +#ifdef __x86_64__ + asm volatile("in %[port], %%al" + : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax"); +#endif +} + +static void ucall_mmio_exit(struct ucall *uc) +{ + *ucall_exit_mmio_addr = (vm_vaddr_t)uc; +} + +void ucall(uint64_t cmd, int nargs, ...) +{ + struct ucall uc = { + .cmd = cmd, + }; + va_list va; + int i; + + nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) + uc.args[i] = va_arg(va, uint64_t); + va_end(va); + + switch (ucall_type) { + case UCALL_PIO: + ucall_pio_exit(&uc); + break; + case UCALL_MMIO: + ucall_mmio_exit(&uc); + break; + }; +} + +uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +{ + struct kvm_run *run = vcpu_state(vm, vcpu_id); + + memset(uc, 0, sizeof(*uc)); + +#ifdef __x86_64__ + if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO && + run->io.port == UCALL_PIO_PORT) { + struct kvm_regs regs; + vcpu_regs_get(vm, vcpu_id, ®s); + memcpy(uc, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(*uc)); + return uc->cmd; + } +#endif + if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO && + run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { + vm_vaddr_t gva; + TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, + "Unexpected ucall exit mmio address access"); + gva = *(vm_vaddr_t *)run->mmio.data; + memcpy(uc, addr_gva2hva(vm, gva), sizeof(*uc)); + } + + return uc->cmd; +} diff --git a/tools/testing/selftests/kvm/platform_info_test.c b/tools/testing/selftests/kvm/platform_info_test.c index 3764e7121265..aa6a14331461 100644 --- a/tools/testing/selftests/kvm/platform_info_test.c +++ b/tools/testing/selftests/kvm/platform_info_test.c @@ -48,7 +48,7 @@ static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable) static void test_msr_platform_info_enabled(struct kvm_vm *vm) { struct kvm_run *run = vcpu_state(vm, VCPU_ID); - struct guest_args args; + struct ucall uc; set_msr_platform_info_enabled(vm, true); vcpu_run(vm, VCPU_ID); @@ -56,11 +56,11 @@ static void test_msr_platform_info_enabled(struct kvm_vm *vm) "Exit_reason other than KVM_EXIT_IO: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - guest_args_read(vm, VCPU_ID, &args); - TEST_ASSERT(args.port == GUEST_PORT_SYNC, - "Received IO from port other than PORT_HOST_SYNC: %u\n", - run->io.port); - TEST_ASSERT((args.arg1 & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == + get_ucall(vm, VCPU_ID, &uc); + TEST_ASSERT(uc.cmd == UCALL_SYNC, + "Received ucall other than UCALL_SYNC: %u\n", + ucall); + TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == MSR_PLATFORM_INFO_MAX_TURBO_RATIO, "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.", MSR_PLATFORM_INFO_MAX_TURBO_RATIO); diff --git a/tools/testing/selftests/kvm/state_test.c b/tools/testing/selftests/kvm/state_test.c index 900e3e9dfb9f..cdf82735d6e7 100644 --- a/tools/testing/selftests/kvm/state_test.c +++ b/tools/testing/selftests/kvm/state_test.c @@ -127,6 +127,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct kvm_run *run; struct kvm_x86_state *state; + struct ucall uc; int stage; struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); @@ -155,23 +156,23 @@ int main(int argc, char *argv[]) memset(®s1, 0, sizeof(regs1)); vcpu_regs_get(vm, VCPU_ID, ®s1); - switch (run->io.port) { - case GUEST_PORT_ABORT: - TEST_ASSERT(false, "%s at %s:%d", (const char *) regs1.rdi, - __FILE__, regs1.rsi); + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + __FILE__, uc.args[1]); /* NOT REACHED */ - case GUEST_PORT_SYNC: + case UCALL_SYNC: break; - case GUEST_PORT_DONE: + case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port); + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); } - /* PORT_SYNC is handled here. */ - TEST_ASSERT(!strcmp((const char *)regs1.rdi, "hello") && - regs1.rsi == stage, "Unexpected register values vmexit #%lx, got %lx", - stage, (ulong) regs1.rsi); + /* UCALL_SYNC is handled here. */ + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", + stage, (ulong)uc.args[1]); state = vcpu_save_state(vm, VCPU_ID); kvm_vm_release(vm); diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c index 49bcc68b0235..8d487c78796a 100644 --- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c @@ -146,26 +146,25 @@ int main(int argc, char *argv[]) for (;;) { volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); - struct guest_args args; + struct ucall uc; vcpu_run(vm, VCPU_ID); - guest_args_read(vm, VCPU_ID, &args); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (args.port) { - case GUEST_PORT_ABORT: - TEST_ASSERT(false, "%s", (const char *) args.arg0); + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s", (const char *)uc.args[0]); /* NOT REACHED */ - case GUEST_PORT_SYNC: - report(args.arg1); + case UCALL_SYNC: + report(uc.args[1]); break; - case GUEST_PORT_DONE: + case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown port 0x%x.", args.port); + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); } } -- cgit v1.2.3 From cc68765d418721ab854a03626c01e8eb82711922 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:26 +0200 Subject: kvm: selftests: move arch-specific files to arch-specific locations Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 13 +- tools/testing/selftests/kvm/Makefile | 22 +- tools/testing/selftests/kvm/cr4_cpuid_sync_test.c | 113 --- tools/testing/selftests/kvm/dirty_log_test.c | 307 ------ tools/testing/selftests/kvm/include/kvm_util.h | 2 +- tools/testing/selftests/kvm/include/sparsebit.h | 6 +- tools/testing/selftests/kvm/include/test_util.h | 6 +- tools/testing/selftests/kvm/include/vmx.h | 552 ----------- tools/testing/selftests/kvm/include/x86.h | 1047 -------------------- .../selftests/kvm/include/x86_64/processor.h | 1047 ++++++++++++++++++++ tools/testing/selftests/kvm/include/x86_64/vmx.h | 552 +++++++++++ tools/testing/selftests/kvm/lib/assert.c | 2 +- .../testing/selftests/kvm/lib/kvm_util_internal.h | 8 +- tools/testing/selftests/kvm/lib/vmx.c | 283 ------ tools/testing/selftests/kvm/lib/x86.c | 888 ----------------- tools/testing/selftests/kvm/lib/x86_64/processor.c | 888 +++++++++++++++++ tools/testing/selftests/kvm/lib/x86_64/vmx.c | 283 ++++++ tools/testing/selftests/kvm/platform_info_test.c | 110 -- tools/testing/selftests/kvm/set_sregs_test.c | 54 - tools/testing/selftests/kvm/state_test.c | 197 ---- tools/testing/selftests/kvm/sync_regs_test.c | 237 ----- tools/testing/selftests/kvm/vmx_tsc_adjust_test.c | 174 ---- .../selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 113 +++ .../testing/selftests/kvm/x86_64/dirty_log_test.c | 307 ++++++ .../selftests/kvm/x86_64/platform_info_test.c | 110 ++ .../testing/selftests/kvm/x86_64/set_sregs_test.c | 54 + tools/testing/selftests/kvm/x86_64/state_test.c | 197 ++++ .../testing/selftests/kvm/x86_64/sync_regs_test.c | 237 +++++ .../selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 174 ++++ 29 files changed, 3992 insertions(+), 3991 deletions(-) delete mode 100644 tools/testing/selftests/kvm/cr4_cpuid_sync_test.c delete mode 100644 tools/testing/selftests/kvm/dirty_log_test.c delete mode 100644 tools/testing/selftests/kvm/include/vmx.h delete mode 100644 tools/testing/selftests/kvm/include/x86.h create mode 100644 tools/testing/selftests/kvm/include/x86_64/processor.h create mode 100644 tools/testing/selftests/kvm/include/x86_64/vmx.h delete mode 100644 tools/testing/selftests/kvm/lib/vmx.c delete mode 100644 tools/testing/selftests/kvm/lib/x86.c create mode 100644 tools/testing/selftests/kvm/lib/x86_64/processor.c create mode 100644 tools/testing/selftests/kvm/lib/x86_64/vmx.c delete mode 100644 tools/testing/selftests/kvm/platform_info_test.c delete mode 100644 tools/testing/selftests/kvm/set_sregs_test.c delete mode 100644 tools/testing/selftests/kvm/state_test.c delete mode 100644 tools/testing/selftests/kvm/sync_regs_test.c delete mode 100644 tools/testing/selftests/kvm/vmx_tsc_adjust_test.c create mode 100644 tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c create mode 100644 tools/testing/selftests/kvm/x86_64/dirty_log_test.c create mode 100644 tools/testing/selftests/kvm/x86_64/platform_info_test.c create mode 100644 tools/testing/selftests/kvm/x86_64/set_sregs_test.c create mode 100644 tools/testing/selftests/kvm/x86_64/state_test.c create mode 100644 tools/testing/selftests/kvm/x86_64/sync_regs_test.c create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 5c34752e1cff..e0aa5411cf9b 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,6 +1,7 @@ -cr4_cpuid_sync_test -platform_info_test -set_sregs_test -sync_regs_test -vmx_tsc_adjust_test -state_test +/x86_64/cr4_cpuid_sync_test +/x86_64/platform_info_test +/x86_64/set_sregs_test +/x86_64/sync_regs_test +/x86_64/vmx_tsc_adjust_test +/x86_64/state_test +/x86_64/dirty_log_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 08426f036353..734a60da9776 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -1,26 +1,26 @@ all: -top_srcdir = ../../../../ +top_srcdir = ../../../.. UNAME_M := $(shell uname -m) LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c -LIBKVM_x86_64 = lib/x86.c lib/vmx.c +LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c -TEST_GEN_PROGS_x86_64 = platform_info_test -TEST_GEN_PROGS_x86_64 += set_sregs_test -TEST_GEN_PROGS_x86_64 += sync_regs_test -TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test -TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test -TEST_GEN_PROGS_x86_64 += state_test -TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 = x86_64/platform_info_test +TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test +TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test +TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 += x86_64/state_test +TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include -CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$( - */ - -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "x86.h" - -#define X86_FEATURE_XSAVE (1<<26) -#define X86_FEATURE_OSXSAVE (1<<27) -#define VCPU_ID 1 - -static inline bool cr4_cpuid_is_sync(void) -{ - int func, subfunc; - uint32_t eax, ebx, ecx, edx; - uint64_t cr4; - - func = 0x1; - subfunc = 0x0; - __asm__ __volatile__("cpuid" - : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) - : "a"(func), "c"(subfunc)); - - cr4 = get_cr4(); - - return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE)); -} - -static void guest_code(void) -{ - uint64_t cr4; - - /* turn on CR4.OSXSAVE */ - cr4 = get_cr4(); - cr4 |= X86_CR4_OSXSAVE; - set_cr4(cr4); - - /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ - GUEST_ASSERT(cr4_cpuid_is_sync()); - - /* notify hypervisor to change CR4 */ - GUEST_SYNC(0); - - /* check again */ - GUEST_ASSERT(cr4_cpuid_is_sync()); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct kvm_run *run; - struct kvm_vm *vm; - struct kvm_sregs sregs; - struct kvm_cpuid_entry2 *entry; - struct ucall uc; - int rc; - - entry = kvm_get_supported_cpuid_entry(1); - if (!(entry->ecx & X86_FEATURE_XSAVE)) { - printf("XSAVE feature not supported, skipping test\n"); - return 0; - } - - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - run = vcpu_state(vm, VCPU_ID); - - while (1) { - rc = _vcpu_run(vm, VCPU_ID); - - if (run->exit_reason == KVM_EXIT_IO) { - switch (get_ucall(vm, VCPU_ID, &uc)) { - case UCALL_SYNC: - /* emulate hypervisor clearing CR4.OSXSAVE */ - vcpu_sregs_get(vm, VCPU_ID, &sregs); - sregs.cr4 &= ~X86_CR4_OSXSAVE; - vcpu_sregs_set(vm, VCPU_ID, &sregs); - break; - case UCALL_ABORT: - TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); - break; - case UCALL_DONE: - goto done; - default: - TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); - } - } - } - - kvm_vm_free(vm); - -done: - return 0; -} diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c deleted file mode 100644 index 7cf3e4ae6046..000000000000 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ /dev/null @@ -1,307 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * KVM dirty page logging test - * - * Copyright (C) 2018, Red Hat, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" - -#define DEBUG printf - -#define VCPU_ID 1 -/* The memory slot index to track dirty pages */ -#define TEST_MEM_SLOT_INDEX 1 -/* - * GPA offset of the testing memory slot. Must be bigger than the - * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES. - */ -#define TEST_MEM_OFFSET (1ULL << 30) /* 1G */ -/* Size of the testing memory slot */ -#define TEST_MEM_PAGES (1ULL << 18) /* 1G for 4K pages */ -/* How many pages to dirty for each guest loop */ -#define TEST_PAGES_PER_LOOP 1024 -/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */ -#define TEST_HOST_LOOP_N 32 -/* Interval for each host loop (ms) */ -#define TEST_HOST_LOOP_INTERVAL 10 - -/* - * Guest variables. We use these variables to share data between host - * and guest. There are two copies of the variables, one in host memory - * (which is unused) and one in guest memory. When the host wants to - * access these variables, it needs to call addr_gva2hva() to access the - * guest copy. - */ -uint64_t guest_random_array[TEST_PAGES_PER_LOOP]; -uint64_t guest_iteration; -uint64_t guest_page_size; - -/* - * Writes to the first byte of a random page within the testing memory - * region continuously. - */ -void guest_code(void) -{ - int i = 0; - uint64_t volatile *array = guest_random_array; - uint64_t volatile *guest_addr; - - while (true) { - for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { - /* - * Write to the first 8 bytes of a random page - * on the testing memory region. - */ - guest_addr = (uint64_t *) - (TEST_MEM_OFFSET + - (array[i] % TEST_MEM_PAGES) * guest_page_size); - *guest_addr = guest_iteration; - } - /* Tell the host that we need more random numbers */ - GUEST_SYNC(1); - } -} - -/* - * Host variables. These variables should only be used by the host - * rather than the guest. - */ -bool host_quit; - -/* Points to the test VM memory region on which we track dirty logs */ -void *host_test_mem; - -/* For statistics only */ -uint64_t host_dirty_count; -uint64_t host_clear_count; -uint64_t host_track_next_count; - -/* - * We use this bitmap to track some pages that should have its dirty - * bit set in the _next_ iteration. For example, if we detected the - * page value changed to current iteration but at the same time the - * page bit is cleared in the latest bitmap, then the system must - * report that write in the next get dirty log call. - */ -unsigned long *host_bmap_track; - -void generate_random_array(uint64_t *guest_array, uint64_t size) -{ - uint64_t i; - - for (i = 0; i < size; i++) { - guest_array[i] = random(); - } -} - -void *vcpu_worker(void *data) -{ - int ret; - uint64_t loops, *guest_array, pages_count = 0; - struct kvm_vm *vm = data; - struct kvm_run *run; - struct ucall uc; - - run = vcpu_state(vm, VCPU_ID); - - /* Retrieve the guest random array pointer and cache it */ - guest_array = addr_gva2hva(vm, (vm_vaddr_t)guest_random_array); - - DEBUG("VCPU starts\n"); - - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); - - while (!READ_ONCE(host_quit)) { - /* Let the guest to dirty these random pages */ - ret = _vcpu_run(vm, VCPU_ID); - if (run->exit_reason == KVM_EXIT_IO && - get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) { - pages_count += TEST_PAGES_PER_LOOP; - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); - } else { - TEST_ASSERT(false, - "Invalid guest sync status: " - "exit_reason=%s\n", - exit_reason_str(run->exit_reason)); - } - } - - DEBUG("VCPU exits, dirtied %"PRIu64" pages\n", pages_count); - - return NULL; -} - -void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration) -{ - uint64_t page; - uint64_t volatile *value_ptr; - - for (page = 0; page < TEST_MEM_PAGES; page++) { - value_ptr = host_test_mem + page * getpagesize(); - - /* If this is a special page that we were tracking... */ - if (test_and_clear_bit(page, host_bmap_track)) { - host_track_next_count++; - TEST_ASSERT(test_bit(page, bmap), - "Page %"PRIu64" should have its dirty bit " - "set in this iteration but it is missing", - page); - } - - if (test_bit(page, bmap)) { - host_dirty_count++; - /* - * If the bit is set, the value written onto - * the corresponding page should be either the - * previous iteration number or the current one. - */ - TEST_ASSERT(*value_ptr == iteration || - *value_ptr == iteration - 1, - "Set page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); - } else { - host_clear_count++; - /* - * If cleared, the value written can be any - * value smaller or equals to the iteration - * number. Note that the value can be exactly - * (iteration-1) if that write can happen - * like this: - * - * (1) increase loop count to "iteration-1" - * (2) write to page P happens (with value - * "iteration-1") - * (3) get dirty log for "iteration-1"; we'll - * see that page P bit is set (dirtied), - * and not set the bit in host_bmap_track - * (4) increase loop count to "iteration" - * (which is current iteration) - * (5) get dirty log for current iteration, - * we'll see that page P is cleared, with - * value "iteration-1". - */ - TEST_ASSERT(*value_ptr <= iteration, - "Clear page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); - if (*value_ptr == iteration) { - /* - * This page is _just_ modified; it - * should report its dirtyness in the - * next run - */ - set_bit(page, host_bmap_track); - } - } - } -} - -void help(char *name) -{ - puts(""); - printf("usage: %s [-i iterations] [-I interval] [-h]\n", name); - puts(""); - printf(" -i: specify iteration counts (default: %"PRIu64")\n", - TEST_HOST_LOOP_N); - printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", - TEST_HOST_LOOP_INTERVAL); - puts(""); - exit(0); -} - -int main(int argc, char *argv[]) -{ - pthread_t vcpu_thread; - struct kvm_vm *vm; - uint64_t volatile *psize, *iteration; - unsigned long *bmap, iterations = TEST_HOST_LOOP_N, - interval = TEST_HOST_LOOP_INTERVAL; - int opt; - - while ((opt = getopt(argc, argv, "hi:I:")) != -1) { - switch (opt) { - case 'i': - iterations = strtol(optarg, NULL, 10); - break; - case 'I': - interval = strtol(optarg, NULL, 10); - break; - case 'h': - default: - help(argv[0]); - break; - } - } - - TEST_ASSERT(iterations > 2, "Iteration must be bigger than zero\n"); - TEST_ASSERT(interval > 0, "Interval must be bigger than zero"); - - DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", - iterations, interval); - - srandom(time(0)); - - bmap = bitmap_alloc(TEST_MEM_PAGES); - host_bmap_track = bitmap_alloc(TEST_MEM_PAGES); - - vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code); - - /* Add an extra memory slot for testing dirty logging */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - TEST_MEM_OFFSET, - TEST_MEM_SLOT_INDEX, - TEST_MEM_PAGES, - KVM_MEM_LOG_DIRTY_PAGES); - /* Cache the HVA pointer of the region */ - host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET); - - /* Do 1:1 mapping for the dirty track memory slot */ - virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET, - TEST_MEM_PAGES * getpagesize(), 0); - - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - - /* Tell the guest about the page size on the system */ - psize = addr_gva2hva(vm, (vm_vaddr_t)&guest_page_size); - *psize = getpagesize(); - - /* Start the iterations */ - iteration = addr_gva2hva(vm, (vm_vaddr_t)&guest_iteration); - *iteration = 1; - - /* Start dirtying pages */ - pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); - - while (*iteration < iterations) { - /* Give the vcpu thread some time to dirty some pages */ - usleep(interval * 1000); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); - vm_dirty_log_verify(bmap, *iteration); - (*iteration)++; - } - - /* Tell the vcpu thread to quit */ - host_quit = true; - pthread_join(vcpu_thread, NULL); - - DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " - "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count, - host_track_next_count); - - free(bmap); - free(host_bmap_track); - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 22667dd48a9c..7830c46f27d8 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -7,7 +7,7 @@ * */ #ifndef SELFTEST_KVM_UTIL_H -#define SELFTEST_KVM_UTIL_H 1 +#define SELFTEST_KVM_UTIL_H #include "test_util.h" diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h index 54cfeb6568d3..31e030915c1f 100644 --- a/tools/testing/selftests/kvm/include/sparsebit.h +++ b/tools/testing/selftests/kvm/include/sparsebit.h @@ -15,8 +15,8 @@ * even in the case where most bits are set. */ -#ifndef _TEST_SPARSEBIT_H_ -#define _TEST_SPARSEBIT_H_ +#ifndef SELFTEST_KVM_SPARSEBIT_H +#define SELFTEST_KVM_SPARSEBIT_H #include #include @@ -72,4 +72,4 @@ void sparsebit_validate_internal(struct sparsebit *sbit); } #endif -#endif /* _TEST_SPARSEBIT_H_ */ +#endif /* SELFTEST_KVM_SPARSEBIT_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 73c3933436ec..c7dafe8bd02c 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -7,8 +7,8 @@ * */ -#ifndef TEST_UTIL_H -#define TEST_UTIL_H 1 +#ifndef SELFTEST_KVM_TEST_UTIL_H +#define SELFTEST_KVM_TEST_UTIL_H #include #include @@ -41,4 +41,4 @@ void test_assert(bool exp, const char *exp_str, #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ } while (0) -#endif /* TEST_UTIL_H */ +#endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h deleted file mode 100644 index b9ffe1024d3a..000000000000 --- a/tools/testing/selftests/kvm/include/vmx.h +++ /dev/null @@ -1,552 +0,0 @@ -/* - * tools/testing/selftests/kvm/include/vmx.h - * - * Copyright (C) 2018, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - */ - -#ifndef SELFTEST_KVM_VMX_H -#define SELFTEST_KVM_VMX_H - -#include -#include "x86.h" - -#define CPUID_VMX_BIT 5 - -#define CPUID_VMX (1 << 5) - -/* - * Definitions of Primary Processor-Based VM-Execution Controls. - */ -#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 -#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 -#define CPU_BASED_HLT_EXITING 0x00000080 -#define CPU_BASED_INVLPG_EXITING 0x00000200 -#define CPU_BASED_MWAIT_EXITING 0x00000400 -#define CPU_BASED_RDPMC_EXITING 0x00000800 -#define CPU_BASED_RDTSC_EXITING 0x00001000 -#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 -#define CPU_BASED_CR3_STORE_EXITING 0x00010000 -#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 -#define CPU_BASED_CR8_STORE_EXITING 0x00100000 -#define CPU_BASED_TPR_SHADOW 0x00200000 -#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 -#define CPU_BASED_MOV_DR_EXITING 0x00800000 -#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 -#define CPU_BASED_USE_IO_BITMAPS 0x02000000 -#define CPU_BASED_MONITOR_TRAP 0x08000000 -#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 -#define CPU_BASED_MONITOR_EXITING 0x20000000 -#define CPU_BASED_PAUSE_EXITING 0x40000000 -#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 - -#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 - -/* - * Definitions of Secondary Processor-Based VM-Execution Controls. - */ -#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 -#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 -#define SECONDARY_EXEC_DESC 0x00000004 -#define SECONDARY_EXEC_RDTSCP 0x00000008 -#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 -#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 -#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 -#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 -#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 -#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 -#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 -#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 -#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 -#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 -#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 -#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 -#define SECONDARY_EXEC_ENABLE_PML 0x00020000 -#define SECONDARY_EPT_VE 0x00040000 -#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000 -#define SECONDARY_EXEC_TSC_SCALING 0x02000000 - -#define PIN_BASED_EXT_INTR_MASK 0x00000001 -#define PIN_BASED_NMI_EXITING 0x00000008 -#define PIN_BASED_VIRTUAL_NMIS 0x00000020 -#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 -#define PIN_BASED_POSTED_INTR 0x00000080 - -#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 - -#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 -#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 -#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 -#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 -#define VM_EXIT_SAVE_IA32_PAT 0x00040000 -#define VM_EXIT_LOAD_IA32_PAT 0x00080000 -#define VM_EXIT_SAVE_IA32_EFER 0x00100000 -#define VM_EXIT_LOAD_IA32_EFER 0x00200000 -#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 - -#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff - -#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 -#define VM_ENTRY_IA32E_MODE 0x00000200 -#define VM_ENTRY_SMM 0x00000400 -#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 -#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 -#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 -#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 - -#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff - -#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f -#define VMX_MISC_SAVE_EFER_LMA 0x00000020 - -#define EXIT_REASON_FAILED_VMENTRY 0x80000000 -#define EXIT_REASON_EXCEPTION_NMI 0 -#define EXIT_REASON_EXTERNAL_INTERRUPT 1 -#define EXIT_REASON_TRIPLE_FAULT 2 -#define EXIT_REASON_PENDING_INTERRUPT 7 -#define EXIT_REASON_NMI_WINDOW 8 -#define EXIT_REASON_TASK_SWITCH 9 -#define EXIT_REASON_CPUID 10 -#define EXIT_REASON_HLT 12 -#define EXIT_REASON_INVD 13 -#define EXIT_REASON_INVLPG 14 -#define EXIT_REASON_RDPMC 15 -#define EXIT_REASON_RDTSC 16 -#define EXIT_REASON_VMCALL 18 -#define EXIT_REASON_VMCLEAR 19 -#define EXIT_REASON_VMLAUNCH 20 -#define EXIT_REASON_VMPTRLD 21 -#define EXIT_REASON_VMPTRST 22 -#define EXIT_REASON_VMREAD 23 -#define EXIT_REASON_VMRESUME 24 -#define EXIT_REASON_VMWRITE 25 -#define EXIT_REASON_VMOFF 26 -#define EXIT_REASON_VMON 27 -#define EXIT_REASON_CR_ACCESS 28 -#define EXIT_REASON_DR_ACCESS 29 -#define EXIT_REASON_IO_INSTRUCTION 30 -#define EXIT_REASON_MSR_READ 31 -#define EXIT_REASON_MSR_WRITE 32 -#define EXIT_REASON_INVALID_STATE 33 -#define EXIT_REASON_MWAIT_INSTRUCTION 36 -#define EXIT_REASON_MONITOR_INSTRUCTION 39 -#define EXIT_REASON_PAUSE_INSTRUCTION 40 -#define EXIT_REASON_MCE_DURING_VMENTRY 41 -#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 -#define EXIT_REASON_APIC_ACCESS 44 -#define EXIT_REASON_EOI_INDUCED 45 -#define EXIT_REASON_EPT_VIOLATION 48 -#define EXIT_REASON_EPT_MISCONFIG 49 -#define EXIT_REASON_INVEPT 50 -#define EXIT_REASON_RDTSCP 51 -#define EXIT_REASON_PREEMPTION_TIMER 52 -#define EXIT_REASON_INVVPID 53 -#define EXIT_REASON_WBINVD 54 -#define EXIT_REASON_XSETBV 55 -#define EXIT_REASON_APIC_WRITE 56 -#define EXIT_REASON_INVPCID 58 -#define EXIT_REASON_PML_FULL 62 -#define EXIT_REASON_XSAVES 63 -#define EXIT_REASON_XRSTORS 64 -#define LAST_EXIT_REASON 64 - -enum vmcs_field { - VIRTUAL_PROCESSOR_ID = 0x00000000, - POSTED_INTR_NV = 0x00000002, - GUEST_ES_SELECTOR = 0x00000800, - GUEST_CS_SELECTOR = 0x00000802, - GUEST_SS_SELECTOR = 0x00000804, - GUEST_DS_SELECTOR = 0x00000806, - GUEST_FS_SELECTOR = 0x00000808, - GUEST_GS_SELECTOR = 0x0000080a, - GUEST_LDTR_SELECTOR = 0x0000080c, - GUEST_TR_SELECTOR = 0x0000080e, - GUEST_INTR_STATUS = 0x00000810, - GUEST_PML_INDEX = 0x00000812, - HOST_ES_SELECTOR = 0x00000c00, - HOST_CS_SELECTOR = 0x00000c02, - HOST_SS_SELECTOR = 0x00000c04, - HOST_DS_SELECTOR = 0x00000c06, - HOST_FS_SELECTOR = 0x00000c08, - HOST_GS_SELECTOR = 0x00000c0a, - HOST_TR_SELECTOR = 0x00000c0c, - IO_BITMAP_A = 0x00002000, - IO_BITMAP_A_HIGH = 0x00002001, - IO_BITMAP_B = 0x00002002, - IO_BITMAP_B_HIGH = 0x00002003, - MSR_BITMAP = 0x00002004, - MSR_BITMAP_HIGH = 0x00002005, - VM_EXIT_MSR_STORE_ADDR = 0x00002006, - VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, - VM_EXIT_MSR_LOAD_ADDR = 0x00002008, - VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, - VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, - VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, - PML_ADDRESS = 0x0000200e, - PML_ADDRESS_HIGH = 0x0000200f, - TSC_OFFSET = 0x00002010, - TSC_OFFSET_HIGH = 0x00002011, - VIRTUAL_APIC_PAGE_ADDR = 0x00002012, - VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, - APIC_ACCESS_ADDR = 0x00002014, - APIC_ACCESS_ADDR_HIGH = 0x00002015, - POSTED_INTR_DESC_ADDR = 0x00002016, - POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, - EPT_POINTER = 0x0000201a, - EPT_POINTER_HIGH = 0x0000201b, - EOI_EXIT_BITMAP0 = 0x0000201c, - EOI_EXIT_BITMAP0_HIGH = 0x0000201d, - EOI_EXIT_BITMAP1 = 0x0000201e, - EOI_EXIT_BITMAP1_HIGH = 0x0000201f, - EOI_EXIT_BITMAP2 = 0x00002020, - EOI_EXIT_BITMAP2_HIGH = 0x00002021, - EOI_EXIT_BITMAP3 = 0x00002022, - EOI_EXIT_BITMAP3_HIGH = 0x00002023, - VMREAD_BITMAP = 0x00002026, - VMREAD_BITMAP_HIGH = 0x00002027, - VMWRITE_BITMAP = 0x00002028, - VMWRITE_BITMAP_HIGH = 0x00002029, - XSS_EXIT_BITMAP = 0x0000202C, - XSS_EXIT_BITMAP_HIGH = 0x0000202D, - TSC_MULTIPLIER = 0x00002032, - TSC_MULTIPLIER_HIGH = 0x00002033, - GUEST_PHYSICAL_ADDRESS = 0x00002400, - GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, - VMCS_LINK_POINTER = 0x00002800, - VMCS_LINK_POINTER_HIGH = 0x00002801, - GUEST_IA32_DEBUGCTL = 0x00002802, - GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, - GUEST_IA32_PAT = 0x00002804, - GUEST_IA32_PAT_HIGH = 0x00002805, - GUEST_IA32_EFER = 0x00002806, - GUEST_IA32_EFER_HIGH = 0x00002807, - GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, - GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, - GUEST_PDPTR0 = 0x0000280a, - GUEST_PDPTR0_HIGH = 0x0000280b, - GUEST_PDPTR1 = 0x0000280c, - GUEST_PDPTR1_HIGH = 0x0000280d, - GUEST_PDPTR2 = 0x0000280e, - GUEST_PDPTR2_HIGH = 0x0000280f, - GUEST_PDPTR3 = 0x00002810, - GUEST_PDPTR3_HIGH = 0x00002811, - GUEST_BNDCFGS = 0x00002812, - GUEST_BNDCFGS_HIGH = 0x00002813, - HOST_IA32_PAT = 0x00002c00, - HOST_IA32_PAT_HIGH = 0x00002c01, - HOST_IA32_EFER = 0x00002c02, - HOST_IA32_EFER_HIGH = 0x00002c03, - HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, - HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, - PIN_BASED_VM_EXEC_CONTROL = 0x00004000, - CPU_BASED_VM_EXEC_CONTROL = 0x00004002, - EXCEPTION_BITMAP = 0x00004004, - PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, - PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, - CR3_TARGET_COUNT = 0x0000400a, - VM_EXIT_CONTROLS = 0x0000400c, - VM_EXIT_MSR_STORE_COUNT = 0x0000400e, - VM_EXIT_MSR_LOAD_COUNT = 0x00004010, - VM_ENTRY_CONTROLS = 0x00004012, - VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, - VM_ENTRY_INTR_INFO_FIELD = 0x00004016, - VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, - VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, - TPR_THRESHOLD = 0x0000401c, - SECONDARY_VM_EXEC_CONTROL = 0x0000401e, - PLE_GAP = 0x00004020, - PLE_WINDOW = 0x00004022, - VM_INSTRUCTION_ERROR = 0x00004400, - VM_EXIT_REASON = 0x00004402, - VM_EXIT_INTR_INFO = 0x00004404, - VM_EXIT_INTR_ERROR_CODE = 0x00004406, - IDT_VECTORING_INFO_FIELD = 0x00004408, - IDT_VECTORING_ERROR_CODE = 0x0000440a, - VM_EXIT_INSTRUCTION_LEN = 0x0000440c, - VMX_INSTRUCTION_INFO = 0x0000440e, - GUEST_ES_LIMIT = 0x00004800, - GUEST_CS_LIMIT = 0x00004802, - GUEST_SS_LIMIT = 0x00004804, - GUEST_DS_LIMIT = 0x00004806, - GUEST_FS_LIMIT = 0x00004808, - GUEST_GS_LIMIT = 0x0000480a, - GUEST_LDTR_LIMIT = 0x0000480c, - GUEST_TR_LIMIT = 0x0000480e, - GUEST_GDTR_LIMIT = 0x00004810, - GUEST_IDTR_LIMIT = 0x00004812, - GUEST_ES_AR_BYTES = 0x00004814, - GUEST_CS_AR_BYTES = 0x00004816, - GUEST_SS_AR_BYTES = 0x00004818, - GUEST_DS_AR_BYTES = 0x0000481a, - GUEST_FS_AR_BYTES = 0x0000481c, - GUEST_GS_AR_BYTES = 0x0000481e, - GUEST_LDTR_AR_BYTES = 0x00004820, - GUEST_TR_AR_BYTES = 0x00004822, - GUEST_INTERRUPTIBILITY_INFO = 0x00004824, - GUEST_ACTIVITY_STATE = 0X00004826, - GUEST_SYSENTER_CS = 0x0000482A, - VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, - HOST_IA32_SYSENTER_CS = 0x00004c00, - CR0_GUEST_HOST_MASK = 0x00006000, - CR4_GUEST_HOST_MASK = 0x00006002, - CR0_READ_SHADOW = 0x00006004, - CR4_READ_SHADOW = 0x00006006, - CR3_TARGET_VALUE0 = 0x00006008, - CR3_TARGET_VALUE1 = 0x0000600a, - CR3_TARGET_VALUE2 = 0x0000600c, - CR3_TARGET_VALUE3 = 0x0000600e, - EXIT_QUALIFICATION = 0x00006400, - GUEST_LINEAR_ADDRESS = 0x0000640a, - GUEST_CR0 = 0x00006800, - GUEST_CR3 = 0x00006802, - GUEST_CR4 = 0x00006804, - GUEST_ES_BASE = 0x00006806, - GUEST_CS_BASE = 0x00006808, - GUEST_SS_BASE = 0x0000680a, - GUEST_DS_BASE = 0x0000680c, - GUEST_FS_BASE = 0x0000680e, - GUEST_GS_BASE = 0x00006810, - GUEST_LDTR_BASE = 0x00006812, - GUEST_TR_BASE = 0x00006814, - GUEST_GDTR_BASE = 0x00006816, - GUEST_IDTR_BASE = 0x00006818, - GUEST_DR7 = 0x0000681a, - GUEST_RSP = 0x0000681c, - GUEST_RIP = 0x0000681e, - GUEST_RFLAGS = 0x00006820, - GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, - GUEST_SYSENTER_ESP = 0x00006824, - GUEST_SYSENTER_EIP = 0x00006826, - HOST_CR0 = 0x00006c00, - HOST_CR3 = 0x00006c02, - HOST_CR4 = 0x00006c04, - HOST_FS_BASE = 0x00006c06, - HOST_GS_BASE = 0x00006c08, - HOST_TR_BASE = 0x00006c0a, - HOST_GDTR_BASE = 0x00006c0c, - HOST_IDTR_BASE = 0x00006c0e, - HOST_IA32_SYSENTER_ESP = 0x00006c10, - HOST_IA32_SYSENTER_EIP = 0x00006c12, - HOST_RSP = 0x00006c14, - HOST_RIP = 0x00006c16, -}; - -struct vmx_msr_entry { - uint32_t index; - uint32_t reserved; - uint64_t value; -} __attribute__ ((aligned(16))); - -static inline int vmxon(uint64_t phys) -{ - uint8_t ret; - - __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" - : [ret]"=rm"(ret) - : [pa]"m"(phys) - : "cc", "memory"); - - return ret; -} - -static inline void vmxoff(void) -{ - __asm__ __volatile__("vmxoff"); -} - -static inline int vmclear(uint64_t vmcs_pa) -{ - uint8_t ret; - - __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" - : [ret]"=rm"(ret) - : [pa]"m"(vmcs_pa) - : "cc", "memory"); - - return ret; -} - -static inline int vmptrld(uint64_t vmcs_pa) -{ - uint8_t ret; - - __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" - : [ret]"=rm"(ret) - : [pa]"m"(vmcs_pa) - : "cc", "memory"); - - return ret; -} - -static inline int vmptrst(uint64_t *value) -{ - uint64_t tmp; - uint8_t ret; - - __asm__ __volatile__("vmptrst %[value]; setna %[ret]" - : [value]"=m"(tmp), [ret]"=rm"(ret) - : : "cc", "memory"); - - *value = tmp; - return ret; -} - -/* - * A wrapper around vmptrst that ignores errors and returns zero if the - * vmptrst instruction fails. - */ -static inline uint64_t vmptrstz(void) -{ - uint64_t value = 0; - vmptrst(&value); - return value; -} - -/* - * No guest state (e.g. GPRs) is established by this vmlaunch. - */ -static inline int vmlaunch(void) -{ - int ret; - - __asm__ __volatile__("push %%rbp;" - "push %%rcx;" - "push %%rdx;" - "push %%rsi;" - "push %%rdi;" - "push $0;" - "vmwrite %%rsp, %[host_rsp];" - "lea 1f(%%rip), %%rax;" - "vmwrite %%rax, %[host_rip];" - "vmlaunch;" - "incq (%%rsp);" - "1: pop %%rax;" - "pop %%rdi;" - "pop %%rsi;" - "pop %%rdx;" - "pop %%rcx;" - "pop %%rbp;" - : [ret]"=&a"(ret) - : [host_rsp]"r"((uint64_t)HOST_RSP), - [host_rip]"r"((uint64_t)HOST_RIP) - : "memory", "cc", "rbx", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15"); - return ret; -} - -/* - * No guest state (e.g. GPRs) is established by this vmresume. - */ -static inline int vmresume(void) -{ - int ret; - - __asm__ __volatile__("push %%rbp;" - "push %%rcx;" - "push %%rdx;" - "push %%rsi;" - "push %%rdi;" - "push $0;" - "vmwrite %%rsp, %[host_rsp];" - "lea 1f(%%rip), %%rax;" - "vmwrite %%rax, %[host_rip];" - "vmresume;" - "incq (%%rsp);" - "1: pop %%rax;" - "pop %%rdi;" - "pop %%rsi;" - "pop %%rdx;" - "pop %%rcx;" - "pop %%rbp;" - : [ret]"=&a"(ret) - : [host_rsp]"r"((uint64_t)HOST_RSP), - [host_rip]"r"((uint64_t)HOST_RIP) - : "memory", "cc", "rbx", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15"); - return ret; -} - -static inline void vmcall(void) -{ - /* Currently, L1 destroys our GPRs during vmexits. */ - __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : : - "rax", "rbx", "rcx", "rdx", - "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", - "r13", "r14", "r15"); -} - -static inline int vmread(uint64_t encoding, uint64_t *value) -{ - uint64_t tmp; - uint8_t ret; - - __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" - : [value]"=rm"(tmp), [ret]"=rm"(ret) - : [encoding]"r"(encoding) - : "cc", "memory"); - - *value = tmp; - return ret; -} - -/* - * A wrapper around vmread that ignores errors and returns zero if the - * vmread instruction fails. - */ -static inline uint64_t vmreadz(uint64_t encoding) -{ - uint64_t value = 0; - vmread(encoding, &value); - return value; -} - -static inline int vmwrite(uint64_t encoding, uint64_t value) -{ - uint8_t ret; - - __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" - : [ret]"=rm"(ret) - : [value]"rm"(value), [encoding]"r"(encoding) - : "cc", "memory"); - - return ret; -} - -static inline uint32_t vmcs_revision(void) -{ - return rdmsr(MSR_IA32_VMX_BASIC); -} - -struct vmx_pages { - void *vmxon_hva; - uint64_t vmxon_gpa; - void *vmxon; - - void *vmcs_hva; - uint64_t vmcs_gpa; - void *vmcs; - - void *msr_hva; - uint64_t msr_gpa; - void *msr; - - void *shadow_vmcs_hva; - uint64_t shadow_vmcs_gpa; - void *shadow_vmcs; - - void *vmread_hva; - uint64_t vmread_gpa; - void *vmread; - - void *vmwrite_hva; - uint64_t vmwrite_gpa; - void *vmwrite; -}; - -struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); -bool prepare_for_vmx_operation(struct vmx_pages *vmx); -void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); - -#endif /* !SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h deleted file mode 100644 index 42c3596815b8..000000000000 --- a/tools/testing/selftests/kvm/include/x86.h +++ /dev/null @@ -1,1047 +0,0 @@ -/* - * tools/testing/selftests/kvm/include/x86.h - * - * Copyright (C) 2018, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - */ - -#ifndef SELFTEST_KVM_X86_H -#define SELFTEST_KVM_X86_H - -#include -#include - -#define X86_EFLAGS_FIXED (1u << 1) - -#define X86_CR4_VME (1ul << 0) -#define X86_CR4_PVI (1ul << 1) -#define X86_CR4_TSD (1ul << 2) -#define X86_CR4_DE (1ul << 3) -#define X86_CR4_PSE (1ul << 4) -#define X86_CR4_PAE (1ul << 5) -#define X86_CR4_MCE (1ul << 6) -#define X86_CR4_PGE (1ul << 7) -#define X86_CR4_PCE (1ul << 8) -#define X86_CR4_OSFXSR (1ul << 9) -#define X86_CR4_OSXMMEXCPT (1ul << 10) -#define X86_CR4_UMIP (1ul << 11) -#define X86_CR4_VMXE (1ul << 13) -#define X86_CR4_SMXE (1ul << 14) -#define X86_CR4_FSGSBASE (1ul << 16) -#define X86_CR4_PCIDE (1ul << 17) -#define X86_CR4_OSXSAVE (1ul << 18) -#define X86_CR4_SMEP (1ul << 20) -#define X86_CR4_SMAP (1ul << 21) -#define X86_CR4_PKE (1ul << 22) - -/* The enum values match the intruction encoding of each register */ -enum x86_register { - RAX = 0, - RCX, - RDX, - RBX, - RSP, - RBP, - RSI, - RDI, - R8, - R9, - R10, - R11, - R12, - R13, - R14, - R15, -}; - -struct desc64 { - uint16_t limit0; - uint16_t base0; - unsigned base1:8, s:1, type:4, dpl:2, p:1; - unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; - uint32_t base3; - uint32_t zero1; -} __attribute__((packed)); - -struct desc_ptr { - uint16_t size; - uint64_t address; -} __attribute__((packed)); - -static inline uint64_t get_desc64_base(const struct desc64 *desc) -{ - return ((uint64_t)desc->base3 << 32) | - (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); -} - -static inline uint64_t rdtsc(void) -{ - uint32_t eax, edx; - - /* - * The lfence is to wait (on Intel CPUs) until all previous - * instructions have been executed. - */ - __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx)); - return ((uint64_t)edx) << 32 | eax; -} - -static inline uint64_t rdtscp(uint32_t *aux) -{ - uint32_t eax, edx; - - __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux)); - return ((uint64_t)edx) << 32 | eax; -} - -static inline uint64_t rdmsr(uint32_t msr) -{ - uint32_t a, d; - - __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); - - return a | ((uint64_t) d << 32); -} - -static inline void wrmsr(uint32_t msr, uint64_t value) -{ - uint32_t a = value; - uint32_t d = value >> 32; - - __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory"); -} - - -static inline uint16_t inw(uint16_t port) -{ - uint16_t tmp; - - __asm__ __volatile__("in %%dx, %%ax" - : /* output */ "=a" (tmp) - : /* input */ "d" (port)); - - return tmp; -} - -static inline uint16_t get_es(void) -{ - uint16_t es; - - __asm__ __volatile__("mov %%es, %[es]" - : /* output */ [es]"=rm"(es)); - return es; -} - -static inline uint16_t get_cs(void) -{ - uint16_t cs; - - __asm__ __volatile__("mov %%cs, %[cs]" - : /* output */ [cs]"=rm"(cs)); - return cs; -} - -static inline uint16_t get_ss(void) -{ - uint16_t ss; - - __asm__ __volatile__("mov %%ss, %[ss]" - : /* output */ [ss]"=rm"(ss)); - return ss; -} - -static inline uint16_t get_ds(void) -{ - uint16_t ds; - - __asm__ __volatile__("mov %%ds, %[ds]" - : /* output */ [ds]"=rm"(ds)); - return ds; -} - -static inline uint16_t get_fs(void) -{ - uint16_t fs; - - __asm__ __volatile__("mov %%fs, %[fs]" - : /* output */ [fs]"=rm"(fs)); - return fs; -} - -static inline uint16_t get_gs(void) -{ - uint16_t gs; - - __asm__ __volatile__("mov %%gs, %[gs]" - : /* output */ [gs]"=rm"(gs)); - return gs; -} - -static inline uint16_t get_tr(void) -{ - uint16_t tr; - - __asm__ __volatile__("str %[tr]" - : /* output */ [tr]"=rm"(tr)); - return tr; -} - -static inline uint64_t get_cr0(void) -{ - uint64_t cr0; - - __asm__ __volatile__("mov %%cr0, %[cr0]" - : /* output */ [cr0]"=r"(cr0)); - return cr0; -} - -static inline uint64_t get_cr3(void) -{ - uint64_t cr3; - - __asm__ __volatile__("mov %%cr3, %[cr3]" - : /* output */ [cr3]"=r"(cr3)); - return cr3; -} - -static inline uint64_t get_cr4(void) -{ - uint64_t cr4; - - __asm__ __volatile__("mov %%cr4, %[cr4]" - : /* output */ [cr4]"=r"(cr4)); - return cr4; -} - -static inline void set_cr4(uint64_t val) -{ - __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); -} - -static inline uint64_t get_gdt_base(void) -{ - struct desc_ptr gdt; - __asm__ __volatile__("sgdt %[gdt]" - : /* output */ [gdt]"=m"(gdt)); - return gdt.address; -} - -static inline uint64_t get_idt_base(void) -{ - struct desc_ptr idt; - __asm__ __volatile__("sidt %[idt]" - : /* output */ [idt]"=m"(idt)); - return idt.address; -} - -#define SET_XMM(__var, __xmm) \ - asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) - -static inline void set_xmm(int n, unsigned long val) -{ - switch (n) { - case 0: - SET_XMM(val, xmm0); - break; - case 1: - SET_XMM(val, xmm1); - break; - case 2: - SET_XMM(val, xmm2); - break; - case 3: - SET_XMM(val, xmm3); - break; - case 4: - SET_XMM(val, xmm4); - break; - case 5: - SET_XMM(val, xmm5); - break; - case 6: - SET_XMM(val, xmm6); - break; - case 7: - SET_XMM(val, xmm7); - break; - } -} - -typedef unsigned long v1di __attribute__ ((vector_size (8))); -static inline unsigned long get_xmm(int n) -{ - assert(n >= 0 && n <= 7); - - register v1di xmm0 __asm__("%xmm0"); - register v1di xmm1 __asm__("%xmm1"); - register v1di xmm2 __asm__("%xmm2"); - register v1di xmm3 __asm__("%xmm3"); - register v1di xmm4 __asm__("%xmm4"); - register v1di xmm5 __asm__("%xmm5"); - register v1di xmm6 __asm__("%xmm6"); - register v1di xmm7 __asm__("%xmm7"); - switch (n) { - case 0: - return (unsigned long)xmm0; - case 1: - return (unsigned long)xmm1; - case 2: - return (unsigned long)xmm2; - case 3: - return (unsigned long)xmm3; - case 4: - return (unsigned long)xmm4; - case 5: - return (unsigned long)xmm5; - case 6: - return (unsigned long)xmm6; - case 7: - return (unsigned long)xmm7; - } - return 0; -} - -struct kvm_x86_state; -struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state); - -/* - * Basic CPU control in CR0 - */ -#define X86_CR0_PE (1UL<<0) /* Protection Enable */ -#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */ -#define X86_CR0_EM (1UL<<2) /* Emulation */ -#define X86_CR0_TS (1UL<<3) /* Task Switched */ -#define X86_CR0_ET (1UL<<4) /* Extension Type */ -#define X86_CR0_NE (1UL<<5) /* Numeric Error */ -#define X86_CR0_WP (1UL<<16) /* Write Protect */ -#define X86_CR0_AM (1UL<<18) /* Alignment Mask */ -#define X86_CR0_NW (1UL<<29) /* Not Write-through */ -#define X86_CR0_CD (1UL<<30) /* Cache Disable */ -#define X86_CR0_PG (1UL<<31) /* Paging */ - -/* - * CPU model specific register (MSR) numbers. - */ - -/* x86-64 specific MSRs */ -#define MSR_EFER 0xc0000080 /* extended feature register */ -#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ -#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ -#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ -#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ -#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ -#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ -#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ -#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ - -/* EFER bits: */ -#define EFER_SCE (1<<0) /* SYSCALL/SYSRET */ -#define EFER_LME (1<<8) /* Long mode enable */ -#define EFER_LMA (1<<10) /* Long mode active (read-only) */ -#define EFER_NX (1<<11) /* No execute enable */ -#define EFER_SVME (1<<12) /* Enable virtualization */ -#define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */ -#define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */ - -/* Intel MSRs. Some also available on other CPUs */ - -#define MSR_PPIN_CTL 0x0000004e -#define MSR_PPIN 0x0000004f - -#define MSR_IA32_PERFCTR0 0x000000c1 -#define MSR_IA32_PERFCTR1 0x000000c2 -#define MSR_FSB_FREQ 0x000000cd -#define MSR_PLATFORM_INFO 0x000000ce -#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 -#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) - -#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 -#define NHM_C3_AUTO_DEMOTE (1UL << 25) -#define NHM_C1_AUTO_DEMOTE (1UL << 26) -#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) -#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) - -#define MSR_MTRRcap 0x000000fe -#define MSR_IA32_BBL_CR_CTL 0x00000119 -#define MSR_IA32_BBL_CR_CTL3 0x0000011e - -#define MSR_IA32_SYSENTER_CS 0x00000174 -#define MSR_IA32_SYSENTER_ESP 0x00000175 -#define MSR_IA32_SYSENTER_EIP 0x00000176 - -#define MSR_IA32_MCG_CAP 0x00000179 -#define MSR_IA32_MCG_STATUS 0x0000017a -#define MSR_IA32_MCG_CTL 0x0000017b -#define MSR_IA32_MCG_EXT_CTL 0x000004d0 - -#define MSR_OFFCORE_RSP_0 0x000001a6 -#define MSR_OFFCORE_RSP_1 0x000001a7 -#define MSR_TURBO_RATIO_LIMIT 0x000001ad -#define MSR_TURBO_RATIO_LIMIT1 0x000001ae -#define MSR_TURBO_RATIO_LIMIT2 0x000001af - -#define MSR_LBR_SELECT 0x000001c8 -#define MSR_LBR_TOS 0x000001c9 -#define MSR_LBR_NHM_FROM 0x00000680 -#define MSR_LBR_NHM_TO 0x000006c0 -#define MSR_LBR_CORE_FROM 0x00000040 -#define MSR_LBR_CORE_TO 0x00000060 - -#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ -#define LBR_INFO_MISPRED BIT_ULL(63) -#define LBR_INFO_IN_TX BIT_ULL(62) -#define LBR_INFO_ABORT BIT_ULL(61) -#define LBR_INFO_CYCLES 0xffff - -#define MSR_IA32_PEBS_ENABLE 0x000003f1 -#define MSR_IA32_DS_AREA 0x00000600 -#define MSR_IA32_PERF_CAPABILITIES 0x00000345 -#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 - -#define MSR_IA32_RTIT_CTL 0x00000570 -#define MSR_IA32_RTIT_STATUS 0x00000571 -#define MSR_IA32_RTIT_ADDR0_A 0x00000580 -#define MSR_IA32_RTIT_ADDR0_B 0x00000581 -#define MSR_IA32_RTIT_ADDR1_A 0x00000582 -#define MSR_IA32_RTIT_ADDR1_B 0x00000583 -#define MSR_IA32_RTIT_ADDR2_A 0x00000584 -#define MSR_IA32_RTIT_ADDR2_B 0x00000585 -#define MSR_IA32_RTIT_ADDR3_A 0x00000586 -#define MSR_IA32_RTIT_ADDR3_B 0x00000587 -#define MSR_IA32_RTIT_CR3_MATCH 0x00000572 -#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 -#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 - -#define MSR_MTRRfix64K_00000 0x00000250 -#define MSR_MTRRfix16K_80000 0x00000258 -#define MSR_MTRRfix16K_A0000 0x00000259 -#define MSR_MTRRfix4K_C0000 0x00000268 -#define MSR_MTRRfix4K_C8000 0x00000269 -#define MSR_MTRRfix4K_D0000 0x0000026a -#define MSR_MTRRfix4K_D8000 0x0000026b -#define MSR_MTRRfix4K_E0000 0x0000026c -#define MSR_MTRRfix4K_E8000 0x0000026d -#define MSR_MTRRfix4K_F0000 0x0000026e -#define MSR_MTRRfix4K_F8000 0x0000026f -#define MSR_MTRRdefType 0x000002ff - -#define MSR_IA32_CR_PAT 0x00000277 - -#define MSR_IA32_DEBUGCTLMSR 0x000001d9 -#define MSR_IA32_LASTBRANCHFROMIP 0x000001db -#define MSR_IA32_LASTBRANCHTOIP 0x000001dc -#define MSR_IA32_LASTINTFROMIP 0x000001dd -#define MSR_IA32_LASTINTTOIP 0x000001de - -/* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ -#define DEBUGCTLMSR_BTF_SHIFT 1 -#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ -#define DEBUGCTLMSR_TR (1UL << 6) -#define DEBUGCTLMSR_BTS (1UL << 7) -#define DEBUGCTLMSR_BTINT (1UL << 8) -#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) -#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) -#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) -#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 -#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) - -#define MSR_PEBS_FRONTEND 0x000003f7 - -#define MSR_IA32_POWER_CTL 0x000001fc - -#define MSR_IA32_MC0_CTL 0x00000400 -#define MSR_IA32_MC0_STATUS 0x00000401 -#define MSR_IA32_MC0_ADDR 0x00000402 -#define MSR_IA32_MC0_MISC 0x00000403 - -/* C-state Residency Counters */ -#define MSR_PKG_C3_RESIDENCY 0x000003f8 -#define MSR_PKG_C6_RESIDENCY 0x000003f9 -#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa -#define MSR_PKG_C7_RESIDENCY 0x000003fa -#define MSR_CORE_C3_RESIDENCY 0x000003fc -#define MSR_CORE_C6_RESIDENCY 0x000003fd -#define MSR_CORE_C7_RESIDENCY 0x000003fe -#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff -#define MSR_PKG_C2_RESIDENCY 0x0000060d -#define MSR_PKG_C8_RESIDENCY 0x00000630 -#define MSR_PKG_C9_RESIDENCY 0x00000631 -#define MSR_PKG_C10_RESIDENCY 0x00000632 - -/* Interrupt Response Limit */ -#define MSR_PKGC3_IRTL 0x0000060a -#define MSR_PKGC6_IRTL 0x0000060b -#define MSR_PKGC7_IRTL 0x0000060c -#define MSR_PKGC8_IRTL 0x00000633 -#define MSR_PKGC9_IRTL 0x00000634 -#define MSR_PKGC10_IRTL 0x00000635 - -/* Run Time Average Power Limiting (RAPL) Interface */ - -#define MSR_RAPL_POWER_UNIT 0x00000606 - -#define MSR_PKG_POWER_LIMIT 0x00000610 -#define MSR_PKG_ENERGY_STATUS 0x00000611 -#define MSR_PKG_PERF_STATUS 0x00000613 -#define MSR_PKG_POWER_INFO 0x00000614 - -#define MSR_DRAM_POWER_LIMIT 0x00000618 -#define MSR_DRAM_ENERGY_STATUS 0x00000619 -#define MSR_DRAM_PERF_STATUS 0x0000061b -#define MSR_DRAM_POWER_INFO 0x0000061c - -#define MSR_PP0_POWER_LIMIT 0x00000638 -#define MSR_PP0_ENERGY_STATUS 0x00000639 -#define MSR_PP0_POLICY 0x0000063a -#define MSR_PP0_PERF_STATUS 0x0000063b - -#define MSR_PP1_POWER_LIMIT 0x00000640 -#define MSR_PP1_ENERGY_STATUS 0x00000641 -#define MSR_PP1_POLICY 0x00000642 - -/* Config TDP MSRs */ -#define MSR_CONFIG_TDP_NOMINAL 0x00000648 -#define MSR_CONFIG_TDP_LEVEL_1 0x00000649 -#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A -#define MSR_CONFIG_TDP_CONTROL 0x0000064B -#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C - -#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D - -#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 -#define MSR_PKG_ANY_CORE_C0_RES 0x00000659 -#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A -#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B - -#define MSR_CORE_C1_RES 0x00000660 -#define MSR_MODULE_C6_RES_MS 0x00000664 - -#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 -#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 - -#define MSR_ATOM_CORE_RATIOS 0x0000066a -#define MSR_ATOM_CORE_VIDS 0x0000066b -#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c -#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d - - -#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 -#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 -#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 - -/* Hardware P state interface */ -#define MSR_PPERF 0x0000064e -#define MSR_PERF_LIMIT_REASONS 0x0000064f -#define MSR_PM_ENABLE 0x00000770 -#define MSR_HWP_CAPABILITIES 0x00000771 -#define MSR_HWP_REQUEST_PKG 0x00000772 -#define MSR_HWP_INTERRUPT 0x00000773 -#define MSR_HWP_REQUEST 0x00000774 -#define MSR_HWP_STATUS 0x00000777 - -/* CPUID.6.EAX */ -#define HWP_BASE_BIT (1<<7) -#define HWP_NOTIFICATIONS_BIT (1<<8) -#define HWP_ACTIVITY_WINDOW_BIT (1<<9) -#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) -#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) - -/* IA32_HWP_CAPABILITIES */ -#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) -#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) -#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) -#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) - -/* IA32_HWP_REQUEST */ -#define HWP_MIN_PERF(x) (x & 0xff) -#define HWP_MAX_PERF(x) ((x & 0xff) << 8) -#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) -#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) -#define HWP_EPP_PERFORMANCE 0x00 -#define HWP_EPP_BALANCE_PERFORMANCE 0x80 -#define HWP_EPP_BALANCE_POWERSAVE 0xC0 -#define HWP_EPP_POWERSAVE 0xFF -#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) -#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) - -/* IA32_HWP_STATUS */ -#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) -#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) - -/* IA32_HWP_INTERRUPT */ -#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) -#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) - -#define MSR_AMD64_MC0_MASK 0xc0010044 - -#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) -#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) -#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) -#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) - -#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) - -/* These are consecutive and not in the normal 4er MCE bank block */ -#define MSR_IA32_MC0_CTL2 0x00000280 -#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) - -#define MSR_P6_PERFCTR0 0x000000c1 -#define MSR_P6_PERFCTR1 0x000000c2 -#define MSR_P6_EVNTSEL0 0x00000186 -#define MSR_P6_EVNTSEL1 0x00000187 - -#define MSR_KNC_PERFCTR0 0x00000020 -#define MSR_KNC_PERFCTR1 0x00000021 -#define MSR_KNC_EVNTSEL0 0x00000028 -#define MSR_KNC_EVNTSEL1 0x00000029 - -/* Alternative perfctr range with full access. */ -#define MSR_IA32_PMC0 0x000004c1 - -/* AMD64 MSRs. Not complete. See the architecture manual for a more - complete list. */ - -#define MSR_AMD64_PATCH_LEVEL 0x0000008b -#define MSR_AMD64_TSC_RATIO 0xc0000104 -#define MSR_AMD64_NB_CFG 0xc001001f -#define MSR_AMD64_PATCH_LOADER 0xc0010020 -#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 -#define MSR_AMD64_OSVW_STATUS 0xc0010141 -#define MSR_AMD64_LS_CFG 0xc0011020 -#define MSR_AMD64_DC_CFG 0xc0011022 -#define MSR_AMD64_BU_CFG2 0xc001102a -#define MSR_AMD64_IBSFETCHCTL 0xc0011030 -#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 -#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 -#define MSR_AMD64_IBSFETCH_REG_COUNT 3 -#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL< +#include + +#define X86_EFLAGS_FIXED (1u << 1) + +#define X86_CR4_VME (1ul << 0) +#define X86_CR4_PVI (1ul << 1) +#define X86_CR4_TSD (1ul << 2) +#define X86_CR4_DE (1ul << 3) +#define X86_CR4_PSE (1ul << 4) +#define X86_CR4_PAE (1ul << 5) +#define X86_CR4_MCE (1ul << 6) +#define X86_CR4_PGE (1ul << 7) +#define X86_CR4_PCE (1ul << 8) +#define X86_CR4_OSFXSR (1ul << 9) +#define X86_CR4_OSXMMEXCPT (1ul << 10) +#define X86_CR4_UMIP (1ul << 11) +#define X86_CR4_VMXE (1ul << 13) +#define X86_CR4_SMXE (1ul << 14) +#define X86_CR4_FSGSBASE (1ul << 16) +#define X86_CR4_PCIDE (1ul << 17) +#define X86_CR4_OSXSAVE (1ul << 18) +#define X86_CR4_SMEP (1ul << 20) +#define X86_CR4_SMAP (1ul << 21) +#define X86_CR4_PKE (1ul << 22) + +/* The enum values match the intruction encoding of each register */ +enum x86_register { + RAX = 0, + RCX, + RDX, + RBX, + RSP, + RBP, + RSI, + RDI, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, +}; + +struct desc64 { + uint16_t limit0; + uint16_t base0; + unsigned base1:8, s:1, type:4, dpl:2, p:1; + unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; + uint32_t base3; + uint32_t zero1; +} __attribute__((packed)); + +struct desc_ptr { + uint16_t size; + uint64_t address; +} __attribute__((packed)); + +static inline uint64_t get_desc64_base(const struct desc64 *desc) +{ + return ((uint64_t)desc->base3 << 32) | + (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); +} + +static inline uint64_t rdtsc(void) +{ + uint32_t eax, edx; + + /* + * The lfence is to wait (on Intel CPUs) until all previous + * instructions have been executed. + */ + __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx)); + return ((uint64_t)edx) << 32 | eax; +} + +static inline uint64_t rdtscp(uint32_t *aux) +{ + uint32_t eax, edx; + + __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux)); + return ((uint64_t)edx) << 32 | eax; +} + +static inline uint64_t rdmsr(uint32_t msr) +{ + uint32_t a, d; + + __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); + + return a | ((uint64_t) d << 32); +} + +static inline void wrmsr(uint32_t msr, uint64_t value) +{ + uint32_t a = value; + uint32_t d = value >> 32; + + __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory"); +} + + +static inline uint16_t inw(uint16_t port) +{ + uint16_t tmp; + + __asm__ __volatile__("in %%dx, %%ax" + : /* output */ "=a" (tmp) + : /* input */ "d" (port)); + + return tmp; +} + +static inline uint16_t get_es(void) +{ + uint16_t es; + + __asm__ __volatile__("mov %%es, %[es]" + : /* output */ [es]"=rm"(es)); + return es; +} + +static inline uint16_t get_cs(void) +{ + uint16_t cs; + + __asm__ __volatile__("mov %%cs, %[cs]" + : /* output */ [cs]"=rm"(cs)); + return cs; +} + +static inline uint16_t get_ss(void) +{ + uint16_t ss; + + __asm__ __volatile__("mov %%ss, %[ss]" + : /* output */ [ss]"=rm"(ss)); + return ss; +} + +static inline uint16_t get_ds(void) +{ + uint16_t ds; + + __asm__ __volatile__("mov %%ds, %[ds]" + : /* output */ [ds]"=rm"(ds)); + return ds; +} + +static inline uint16_t get_fs(void) +{ + uint16_t fs; + + __asm__ __volatile__("mov %%fs, %[fs]" + : /* output */ [fs]"=rm"(fs)); + return fs; +} + +static inline uint16_t get_gs(void) +{ + uint16_t gs; + + __asm__ __volatile__("mov %%gs, %[gs]" + : /* output */ [gs]"=rm"(gs)); + return gs; +} + +static inline uint16_t get_tr(void) +{ + uint16_t tr; + + __asm__ __volatile__("str %[tr]" + : /* output */ [tr]"=rm"(tr)); + return tr; +} + +static inline uint64_t get_cr0(void) +{ + uint64_t cr0; + + __asm__ __volatile__("mov %%cr0, %[cr0]" + : /* output */ [cr0]"=r"(cr0)); + return cr0; +} + +static inline uint64_t get_cr3(void) +{ + uint64_t cr3; + + __asm__ __volatile__("mov %%cr3, %[cr3]" + : /* output */ [cr3]"=r"(cr3)); + return cr3; +} + +static inline uint64_t get_cr4(void) +{ + uint64_t cr4; + + __asm__ __volatile__("mov %%cr4, %[cr4]" + : /* output */ [cr4]"=r"(cr4)); + return cr4; +} + +static inline void set_cr4(uint64_t val) +{ + __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); +} + +static inline uint64_t get_gdt_base(void) +{ + struct desc_ptr gdt; + __asm__ __volatile__("sgdt %[gdt]" + : /* output */ [gdt]"=m"(gdt)); + return gdt.address; +} + +static inline uint64_t get_idt_base(void) +{ + struct desc_ptr idt; + __asm__ __volatile__("sidt %[idt]" + : /* output */ [idt]"=m"(idt)); + return idt.address; +} + +#define SET_XMM(__var, __xmm) \ + asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) + +static inline void set_xmm(int n, unsigned long val) +{ + switch (n) { + case 0: + SET_XMM(val, xmm0); + break; + case 1: + SET_XMM(val, xmm1); + break; + case 2: + SET_XMM(val, xmm2); + break; + case 3: + SET_XMM(val, xmm3); + break; + case 4: + SET_XMM(val, xmm4); + break; + case 5: + SET_XMM(val, xmm5); + break; + case 6: + SET_XMM(val, xmm6); + break; + case 7: + SET_XMM(val, xmm7); + break; + } +} + +typedef unsigned long v1di __attribute__ ((vector_size (8))); +static inline unsigned long get_xmm(int n) +{ + assert(n >= 0 && n <= 7); + + register v1di xmm0 __asm__("%xmm0"); + register v1di xmm1 __asm__("%xmm1"); + register v1di xmm2 __asm__("%xmm2"); + register v1di xmm3 __asm__("%xmm3"); + register v1di xmm4 __asm__("%xmm4"); + register v1di xmm5 __asm__("%xmm5"); + register v1di xmm6 __asm__("%xmm6"); + register v1di xmm7 __asm__("%xmm7"); + switch (n) { + case 0: + return (unsigned long)xmm0; + case 1: + return (unsigned long)xmm1; + case 2: + return (unsigned long)xmm2; + case 3: + return (unsigned long)xmm3; + case 4: + return (unsigned long)xmm4; + case 5: + return (unsigned long)xmm5; + case 6: + return (unsigned long)xmm6; + case 7: + return (unsigned long)xmm7; + } + return 0; +} + +struct kvm_x86_state; +struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state); + +/* + * Basic CPU control in CR0 + */ +#define X86_CR0_PE (1UL<<0) /* Protection Enable */ +#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */ +#define X86_CR0_EM (1UL<<2) /* Emulation */ +#define X86_CR0_TS (1UL<<3) /* Task Switched */ +#define X86_CR0_ET (1UL<<4) /* Extension Type */ +#define X86_CR0_NE (1UL<<5) /* Numeric Error */ +#define X86_CR0_WP (1UL<<16) /* Write Protect */ +#define X86_CR0_AM (1UL<<18) /* Alignment Mask */ +#define X86_CR0_NW (1UL<<29) /* Not Write-through */ +#define X86_CR0_CD (1UL<<30) /* Cache Disable */ +#define X86_CR0_PG (1UL<<31) /* Paging */ + +/* + * CPU model specific register (MSR) numbers. + */ + +/* x86-64 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ +#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ + +/* EFER bits: */ +#define EFER_SCE (1<<0) /* SYSCALL/SYSRET */ +#define EFER_LME (1<<8) /* Long mode enable */ +#define EFER_LMA (1<<10) /* Long mode active (read-only) */ +#define EFER_NX (1<<11) /* No execute enable */ +#define EFER_SVME (1<<12) /* Enable virtualization */ +#define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */ +#define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */ + +/* Intel MSRs. Some also available on other CPUs */ + +#define MSR_PPIN_CTL 0x0000004e +#define MSR_PPIN 0x0000004f + +#define MSR_IA32_PERFCTR0 0x000000c1 +#define MSR_IA32_PERFCTR1 0x000000c2 +#define MSR_FSB_FREQ 0x000000cd +#define MSR_PLATFORM_INFO 0x000000ce +#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 +#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) + +#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 +#define NHM_C3_AUTO_DEMOTE (1UL << 25) +#define NHM_C1_AUTO_DEMOTE (1UL << 26) +#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) + +#define MSR_MTRRcap 0x000000fe +#define MSR_IA32_BBL_CR_CTL 0x00000119 +#define MSR_IA32_BBL_CR_CTL3 0x0000011e + +#define MSR_IA32_SYSENTER_CS 0x00000174 +#define MSR_IA32_SYSENTER_ESP 0x00000175 +#define MSR_IA32_SYSENTER_EIP 0x00000176 + +#define MSR_IA32_MCG_CAP 0x00000179 +#define MSR_IA32_MCG_STATUS 0x0000017a +#define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_IA32_MCG_EXT_CTL 0x000004d0 + +#define MSR_OFFCORE_RSP_0 0x000001a6 +#define MSR_OFFCORE_RSP_1 0x000001a7 +#define MSR_TURBO_RATIO_LIMIT 0x000001ad +#define MSR_TURBO_RATIO_LIMIT1 0x000001ae +#define MSR_TURBO_RATIO_LIMIT2 0x000001af + +#define MSR_LBR_SELECT 0x000001c8 +#define MSR_LBR_TOS 0x000001c9 +#define MSR_LBR_NHM_FROM 0x00000680 +#define MSR_LBR_NHM_TO 0x000006c0 +#define MSR_LBR_CORE_FROM 0x00000040 +#define MSR_LBR_CORE_TO 0x00000060 + +#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ +#define LBR_INFO_MISPRED BIT_ULL(63) +#define LBR_INFO_IN_TX BIT_ULL(62) +#define LBR_INFO_ABORT BIT_ULL(61) +#define LBR_INFO_CYCLES 0xffff + +#define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_IA32_DS_AREA 0x00000600 +#define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 + +#define MSR_IA32_RTIT_CTL 0x00000570 +#define MSR_IA32_RTIT_STATUS 0x00000571 +#define MSR_IA32_RTIT_ADDR0_A 0x00000580 +#define MSR_IA32_RTIT_ADDR0_B 0x00000581 +#define MSR_IA32_RTIT_ADDR1_A 0x00000582 +#define MSR_IA32_RTIT_ADDR1_B 0x00000583 +#define MSR_IA32_RTIT_ADDR2_A 0x00000584 +#define MSR_IA32_RTIT_ADDR2_B 0x00000585 +#define MSR_IA32_RTIT_ADDR3_A 0x00000586 +#define MSR_IA32_RTIT_ADDR3_B 0x00000587 +#define MSR_IA32_RTIT_CR3_MATCH 0x00000572 +#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 +#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 + +#define MSR_MTRRfix64K_00000 0x00000250 +#define MSR_MTRRfix16K_80000 0x00000258 +#define MSR_MTRRfix16K_A0000 0x00000259 +#define MSR_MTRRfix4K_C0000 0x00000268 +#define MSR_MTRRfix4K_C8000 0x00000269 +#define MSR_MTRRfix4K_D0000 0x0000026a +#define MSR_MTRRfix4K_D8000 0x0000026b +#define MSR_MTRRfix4K_E0000 0x0000026c +#define MSR_MTRRfix4K_E8000 0x0000026d +#define MSR_MTRRfix4K_F0000 0x0000026e +#define MSR_MTRRfix4K_F8000 0x0000026f +#define MSR_MTRRdefType 0x000002ff + +#define MSR_IA32_CR_PAT 0x00000277 + +#define MSR_IA32_DEBUGCTLMSR 0x000001d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x000001db +#define MSR_IA32_LASTBRANCHTOIP 0x000001dc +#define MSR_IA32_LASTINTFROMIP 0x000001dd +#define MSR_IA32_LASTINTTOIP 0x000001de + +/* DEBUGCTLMSR bits (others vary by model): */ +#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF_SHIFT 1 +#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_TR (1UL << 6) +#define DEBUGCTLMSR_BTS (1UL << 7) +#define DEBUGCTLMSR_BTINT (1UL << 8) +#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) +#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) +#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 +#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) + +#define MSR_PEBS_FRONTEND 0x000003f7 + +#define MSR_IA32_POWER_CTL 0x000001fc + +#define MSR_IA32_MC0_CTL 0x00000400 +#define MSR_IA32_MC0_STATUS 0x00000401 +#define MSR_IA32_MC0_ADDR 0x00000402 +#define MSR_IA32_MC0_MISC 0x00000403 + +/* C-state Residency Counters */ +#define MSR_PKG_C3_RESIDENCY 0x000003f8 +#define MSR_PKG_C6_RESIDENCY 0x000003f9 +#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa +#define MSR_PKG_C7_RESIDENCY 0x000003fa +#define MSR_CORE_C3_RESIDENCY 0x000003fc +#define MSR_CORE_C6_RESIDENCY 0x000003fd +#define MSR_CORE_C7_RESIDENCY 0x000003fe +#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff +#define MSR_PKG_C2_RESIDENCY 0x0000060d +#define MSR_PKG_C8_RESIDENCY 0x00000630 +#define MSR_PKG_C9_RESIDENCY 0x00000631 +#define MSR_PKG_C10_RESIDENCY 0x00000632 + +/* Interrupt Response Limit */ +#define MSR_PKGC3_IRTL 0x0000060a +#define MSR_PKGC6_IRTL 0x0000060b +#define MSR_PKGC7_IRTL 0x0000060c +#define MSR_PKGC8_IRTL 0x00000633 +#define MSR_PKGC9_IRTL 0x00000634 +#define MSR_PKGC10_IRTL 0x00000635 + +/* Run Time Average Power Limiting (RAPL) Interface */ + +#define MSR_RAPL_POWER_UNIT 0x00000606 + +#define MSR_PKG_POWER_LIMIT 0x00000610 +#define MSR_PKG_ENERGY_STATUS 0x00000611 +#define MSR_PKG_PERF_STATUS 0x00000613 +#define MSR_PKG_POWER_INFO 0x00000614 + +#define MSR_DRAM_POWER_LIMIT 0x00000618 +#define MSR_DRAM_ENERGY_STATUS 0x00000619 +#define MSR_DRAM_PERF_STATUS 0x0000061b +#define MSR_DRAM_POWER_INFO 0x0000061c + +#define MSR_PP0_POWER_LIMIT 0x00000638 +#define MSR_PP0_ENERGY_STATUS 0x00000639 +#define MSR_PP0_POLICY 0x0000063a +#define MSR_PP0_PERF_STATUS 0x0000063b + +#define MSR_PP1_POWER_LIMIT 0x00000640 +#define MSR_PP1_ENERGY_STATUS 0x00000641 +#define MSR_PP1_POLICY 0x00000642 + +/* Config TDP MSRs */ +#define MSR_CONFIG_TDP_NOMINAL 0x00000648 +#define MSR_CONFIG_TDP_LEVEL_1 0x00000649 +#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A +#define MSR_CONFIG_TDP_CONTROL 0x0000064B +#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C + +#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D + +#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 +#define MSR_PKG_ANY_CORE_C0_RES 0x00000659 +#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A +#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B + +#define MSR_CORE_C1_RES 0x00000660 +#define MSR_MODULE_C6_RES_MS 0x00000664 + +#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 +#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 + +#define MSR_ATOM_CORE_RATIOS 0x0000066a +#define MSR_ATOM_CORE_VIDS 0x0000066b +#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c +#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d + + +#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 +#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 +#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 + +/* Hardware P state interface */ +#define MSR_PPERF 0x0000064e +#define MSR_PERF_LIMIT_REASONS 0x0000064f +#define MSR_PM_ENABLE 0x00000770 +#define MSR_HWP_CAPABILITIES 0x00000771 +#define MSR_HWP_REQUEST_PKG 0x00000772 +#define MSR_HWP_INTERRUPT 0x00000773 +#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_STATUS 0x00000777 + +/* CPUID.6.EAX */ +#define HWP_BASE_BIT (1<<7) +#define HWP_NOTIFICATIONS_BIT (1<<8) +#define HWP_ACTIVITY_WINDOW_BIT (1<<9) +#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) +#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) + +/* IA32_HWP_CAPABILITIES */ +#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) +#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) +#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) +#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) + +/* IA32_HWP_REQUEST */ +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_EPP_PERFORMANCE 0x00 +#define HWP_EPP_BALANCE_PERFORMANCE 0x80 +#define HWP_EPP_BALANCE_POWERSAVE 0xC0 +#define HWP_EPP_POWERSAVE 0xFF +#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) + +/* IA32_HWP_STATUS */ +#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) + +/* IA32_HWP_INTERRUPT */ +#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) + +#define MSR_AMD64_MC0_MASK 0xc0010044 + +#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) +#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) +#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) +#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) + +#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) + +/* These are consecutive and not in the normal 4er MCE bank block */ +#define MSR_IA32_MC0_CTL2 0x00000280 +#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) + +#define MSR_P6_PERFCTR0 0x000000c1 +#define MSR_P6_PERFCTR1 0x000000c2 +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +#define MSR_KNC_PERFCTR0 0x00000020 +#define MSR_KNC_PERFCTR1 0x00000021 +#define MSR_KNC_EVNTSEL0 0x00000028 +#define MSR_KNC_EVNTSEL1 0x00000029 + +/* Alternative perfctr range with full access. */ +#define MSR_IA32_PMC0 0x000004c1 + +/* AMD64 MSRs. Not complete. See the architecture manual for a more + complete list. */ + +#define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_TSC_RATIO 0xc0000104 +#define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_PATCH_LOADER 0xc0010020 +#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 +#define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD64_LS_CFG 0xc0011020 +#define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_BU_CFG2 0xc001102a +#define MSR_AMD64_IBSFETCHCTL 0xc0011030 +#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSFETCH_REG_COUNT 3 +#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL< +#include "processor.h" + +#define CPUID_VMX_BIT 5 + +#define CPUID_VMX (1 << 5) + +/* + * Definitions of Primary Processor-Based VM-Execution Controls. + */ +#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 +#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define CPU_BASED_HLT_EXITING 0x00000080 +#define CPU_BASED_INVLPG_EXITING 0x00000200 +#define CPU_BASED_MWAIT_EXITING 0x00000400 +#define CPU_BASED_RDPMC_EXITING 0x00000800 +#define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define CPU_BASED_CR3_STORE_EXITING 0x00010000 +#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 +#define CPU_BASED_MOV_DR_EXITING 0x00800000 +#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_MONITOR_TRAP 0x08000000 +#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define CPU_BASED_MONITOR_EXITING 0x20000000 +#define CPU_BASED_PAUSE_EXITING 0x40000000 +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 + +/* + * Definitions of Secondary Processor-Based VM-Execution Controls. + */ +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define SECONDARY_EXEC_DESC 0x00000004 +#define SECONDARY_EXEC_RDTSCP 0x00000008 +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 +#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 +#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 +#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 +#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 +#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 +#define SECONDARY_EXEC_ENABLE_PML 0x00020000 +#define SECONDARY_EPT_VE 0x00040000 +#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000 +#define SECONDARY_EXEC_TSC_SCALING 0x02000000 + +#define PIN_BASED_EXT_INTR_MASK 0x00000001 +#define PIN_BASED_NMI_EXITING 0x00000008 +#define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define PIN_BASED_POSTED_INTR 0x00000080 + +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 + +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 +#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 +#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 + +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 + +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 + +#define EXIT_REASON_FAILED_VMENTRY 0x80000000 +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EOI_INDUCED 45 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 +#define EXIT_REASON_PREEMPTION_TIMER 52 +#define EXIT_REASON_INVVPID 53 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_PML_FULL 62 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 +#define LAST_EXIT_REASON 64 + +enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, + GUEST_ES_SELECTOR = 0x00000800, + GUEST_CS_SELECTOR = 0x00000802, + GUEST_SS_SELECTOR = 0x00000804, + GUEST_DS_SELECTOR = 0x00000806, + GUEST_FS_SELECTOR = 0x00000808, + GUEST_GS_SELECTOR = 0x0000080a, + GUEST_LDTR_SELECTOR = 0x0000080c, + GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, + GUEST_PML_INDEX = 0x00000812, + HOST_ES_SELECTOR = 0x00000c00, + HOST_CS_SELECTOR = 0x00000c02, + HOST_SS_SELECTOR = 0x00000c04, + HOST_DS_SELECTOR = 0x00000c06, + HOST_FS_SELECTOR = 0x00000c08, + HOST_GS_SELECTOR = 0x00000c0a, + HOST_TR_SELECTOR = 0x00000c0c, + IO_BITMAP_A = 0x00002000, + IO_BITMAP_A_HIGH = 0x00002001, + IO_BITMAP_B = 0x00002002, + IO_BITMAP_B_HIGH = 0x00002003, + MSR_BITMAP = 0x00002004, + MSR_BITMAP_HIGH = 0x00002005, + VM_EXIT_MSR_STORE_ADDR = 0x00002006, + VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, + VM_EXIT_MSR_LOAD_ADDR = 0x00002008, + VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, + VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, + VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, + PML_ADDRESS = 0x0000200e, + PML_ADDRESS_HIGH = 0x0000200f, + TSC_OFFSET = 0x00002010, + TSC_OFFSET_HIGH = 0x00002011, + VIRTUAL_APIC_PAGE_ADDR = 0x00002012, + VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, + APIC_ACCESS_ADDR = 0x00002014, + APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, + VMREAD_BITMAP = 0x00002026, + VMREAD_BITMAP_HIGH = 0x00002027, + VMWRITE_BITMAP = 0x00002028, + VMWRITE_BITMAP_HIGH = 0x00002029, + XSS_EXIT_BITMAP = 0x0000202C, + XSS_EXIT_BITMAP_HIGH = 0x0000202D, + TSC_MULTIPLIER = 0x00002032, + TSC_MULTIPLIER_HIGH = 0x00002033, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMCS_LINK_POINTER = 0x00002800, + VMCS_LINK_POINTER_HIGH = 0x00002801, + GUEST_IA32_DEBUGCTL = 0x00002802, + GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, + GUEST_IA32_EFER = 0x00002806, + GUEST_IA32_EFER_HIGH = 0x00002807, + GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, + GUEST_BNDCFGS = 0x00002812, + GUEST_BNDCFGS_HIGH = 0x00002813, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, + HOST_IA32_EFER = 0x00002c02, + HOST_IA32_EFER_HIGH = 0x00002c03, + HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, + HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, + PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + EXCEPTION_BITMAP = 0x00004004, + PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + CR3_TARGET_COUNT = 0x0000400a, + VM_EXIT_CONTROLS = 0x0000400c, + VM_EXIT_MSR_STORE_COUNT = 0x0000400e, + VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + VM_ENTRY_CONTROLS = 0x00004012, + VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, + TPR_THRESHOLD = 0x0000401c, + SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + PLE_GAP = 0x00004020, + PLE_WINDOW = 0x00004022, + VM_INSTRUCTION_ERROR = 0x00004400, + VM_EXIT_REASON = 0x00004402, + VM_EXIT_INTR_INFO = 0x00004404, + VM_EXIT_INTR_ERROR_CODE = 0x00004406, + IDT_VECTORING_INFO_FIELD = 0x00004408, + IDT_VECTORING_ERROR_CODE = 0x0000440a, + VM_EXIT_INSTRUCTION_LEN = 0x0000440c, + VMX_INSTRUCTION_INFO = 0x0000440e, + GUEST_ES_LIMIT = 0x00004800, + GUEST_CS_LIMIT = 0x00004802, + GUEST_SS_LIMIT = 0x00004804, + GUEST_DS_LIMIT = 0x00004806, + GUEST_FS_LIMIT = 0x00004808, + GUEST_GS_LIMIT = 0x0000480a, + GUEST_LDTR_LIMIT = 0x0000480c, + GUEST_TR_LIMIT = 0x0000480e, + GUEST_GDTR_LIMIT = 0x00004810, + GUEST_IDTR_LIMIT = 0x00004812, + GUEST_ES_AR_BYTES = 0x00004814, + GUEST_CS_AR_BYTES = 0x00004816, + GUEST_SS_AR_BYTES = 0x00004818, + GUEST_DS_AR_BYTES = 0x0000481a, + GUEST_FS_AR_BYTES = 0x0000481c, + GUEST_GS_AR_BYTES = 0x0000481e, + GUEST_LDTR_AR_BYTES = 0x00004820, + GUEST_TR_AR_BYTES = 0x00004822, + GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + HOST_IA32_SYSENTER_CS = 0x00004c00, + CR0_GUEST_HOST_MASK = 0x00006000, + CR4_GUEST_HOST_MASK = 0x00006002, + CR0_READ_SHADOW = 0x00006004, + CR4_READ_SHADOW = 0x00006006, + CR3_TARGET_VALUE0 = 0x00006008, + CR3_TARGET_VALUE1 = 0x0000600a, + CR3_TARGET_VALUE2 = 0x0000600c, + CR3_TARGET_VALUE3 = 0x0000600e, + EXIT_QUALIFICATION = 0x00006400, + GUEST_LINEAR_ADDRESS = 0x0000640a, + GUEST_CR0 = 0x00006800, + GUEST_CR3 = 0x00006802, + GUEST_CR4 = 0x00006804, + GUEST_ES_BASE = 0x00006806, + GUEST_CS_BASE = 0x00006808, + GUEST_SS_BASE = 0x0000680a, + GUEST_DS_BASE = 0x0000680c, + GUEST_FS_BASE = 0x0000680e, + GUEST_GS_BASE = 0x00006810, + GUEST_LDTR_BASE = 0x00006812, + GUEST_TR_BASE = 0x00006814, + GUEST_GDTR_BASE = 0x00006816, + GUEST_IDTR_BASE = 0x00006818, + GUEST_DR7 = 0x0000681a, + GUEST_RSP = 0x0000681c, + GUEST_RIP = 0x0000681e, + GUEST_RFLAGS = 0x00006820, + GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, + GUEST_SYSENTER_ESP = 0x00006824, + GUEST_SYSENTER_EIP = 0x00006826, + HOST_CR0 = 0x00006c00, + HOST_CR3 = 0x00006c02, + HOST_CR4 = 0x00006c04, + HOST_FS_BASE = 0x00006c06, + HOST_GS_BASE = 0x00006c08, + HOST_TR_BASE = 0x00006c0a, + HOST_GDTR_BASE = 0x00006c0c, + HOST_IDTR_BASE = 0x00006c0e, + HOST_IA32_SYSENTER_ESP = 0x00006c10, + HOST_IA32_SYSENTER_EIP = 0x00006c12, + HOST_RSP = 0x00006c14, + HOST_RIP = 0x00006c16, +}; + +struct vmx_msr_entry { + uint32_t index; + uint32_t reserved; + uint64_t value; +} __attribute__ ((aligned(16))); + +static inline int vmxon(uint64_t phys) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(phys) + : "cc", "memory"); + + return ret; +} + +static inline void vmxoff(void) +{ + __asm__ __volatile__("vmxoff"); +} + +static inline int vmclear(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +static inline int vmptrld(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +static inline int vmptrst(uint64_t *value) +{ + uint64_t tmp; + uint8_t ret; + + __asm__ __volatile__("vmptrst %[value]; setna %[ret]" + : [value]"=m"(tmp), [ret]"=rm"(ret) + : : "cc", "memory"); + + *value = tmp; + return ret; +} + +/* + * A wrapper around vmptrst that ignores errors and returns zero if the + * vmptrst instruction fails. + */ +static inline uint64_t vmptrstz(void) +{ + uint64_t value = 0; + vmptrst(&value); + return value; +} + +/* + * No guest state (e.g. GPRs) is established by this vmlaunch. + */ +static inline int vmlaunch(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmlaunch;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmresume. + */ +static inline int vmresume(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmresume;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +static inline void vmcall(void) +{ + /* Currently, L1 destroys our GPRs during vmexits. */ + __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : : + "rax", "rbx", "rcx", "rdx", + "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", + "r13", "r14", "r15"); +} + +static inline int vmread(uint64_t encoding, uint64_t *value) +{ + uint64_t tmp; + uint8_t ret; + + __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" + : [value]"=rm"(tmp), [ret]"=rm"(ret) + : [encoding]"r"(encoding) + : "cc", "memory"); + + *value = tmp; + return ret; +} + +/* + * A wrapper around vmread that ignores errors and returns zero if the + * vmread instruction fails. + */ +static inline uint64_t vmreadz(uint64_t encoding) +{ + uint64_t value = 0; + vmread(encoding, &value); + return value; +} + +static inline int vmwrite(uint64_t encoding, uint64_t value) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" + : [ret]"=rm"(ret) + : [value]"rm"(value), [encoding]"r"(encoding) + : "cc", "memory"); + + return ret; +} + +static inline uint32_t vmcs_revision(void) +{ + return rdmsr(MSR_IA32_VMX_BASIC); +} + +struct vmx_pages { + void *vmxon_hva; + uint64_t vmxon_gpa; + void *vmxon; + + void *vmcs_hva; + uint64_t vmcs_gpa; + void *vmcs; + + void *msr_hva; + uint64_t msr_gpa; + void *msr; + + void *shadow_vmcs_hva; + uint64_t shadow_vmcs_gpa; + void *shadow_vmcs; + + void *vmread_hva; + uint64_t vmread_gpa; + void *vmread; + + void *vmwrite_hva; + uint64_t vmwrite_gpa; + void *vmwrite; +}; + +struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); +bool prepare_for_vmx_operation(struct vmx_pages *vmx); +void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); + +#endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index cd01144d27c8..6398efe67885 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -13,7 +13,7 @@ #include #include -#include "../../kselftest.h" +#include "kselftest.h" /* Dumps the current stack trace to stderr. */ static void __attribute__((noinline)) test_dump_stack(void); diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 8fa7c9f7567a..36367be0fe26 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -1,13 +1,13 @@ /* - * tools/testing/selftests/kvm/lib/kvm_util.c + * tools/testing/selftests/kvm/lib/kvm_util_internal.h * * Copyright (C) 2018, Google LLC. * * This work is licensed under the terms of the GNU GPL, version 2. */ -#ifndef KVM_UTIL_INTERNAL_H -#define KVM_UTIL_INTERNAL_H 1 +#ifndef SELFTEST_KVM_UTIL_INTERNAL_H +#define SELFTEST_KVM_UTIL_INTERNAL_H #include "sparsebit.h" @@ -70,4 +70,4 @@ void regs_dump(FILE *stream, struct kvm_regs *regs, void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent); -#endif +#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */ diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c deleted file mode 100644 index b987c3c970eb..000000000000 --- a/tools/testing/selftests/kvm/lib/vmx.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * tools/testing/selftests/kvm/lib/x86.c - * - * Copyright (C) 2018, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ - -#define _GNU_SOURCE /* for program_invocation_name */ - -#include "test_util.h" -#include "kvm_util.h" -#include "x86.h" -#include "vmx.h" - -/* Allocate memory regions for nested VMX tests. - * - * Input Args: - * vm - The VM to allocate guest-virtual addresses in. - * - * Output Args: - * p_vmx_gva - The guest virtual address for the struct vmx_pages. - * - * Return: - * Pointer to structure with the addresses of the VMX areas. - */ -struct vmx_pages * -vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) -{ - vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); - struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva); - - /* Setup of a region of guest memory for the vmxon region. */ - vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); - vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon); - vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon); - - /* Setup of a region of guest memory for a vmcs. */ - vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); - vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs); - vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs); - - /* Setup of a region of guest memory for the MSR bitmap. */ - vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); - vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr); - vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr); - memset(vmx->msr_hva, 0, getpagesize()); - - /* Setup of a region of guest memory for the shadow VMCS. */ - vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); - vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs); - vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs); - - /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */ - vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); - vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread); - vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread); - memset(vmx->vmread_hva, 0, getpagesize()); - - vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); - vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite); - vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); - memset(vmx->vmwrite_hva, 0, getpagesize()); - - *p_vmx_gva = vmx_gva; - return vmx; -} - -bool prepare_for_vmx_operation(struct vmx_pages *vmx) -{ - uint64_t feature_control; - uint64_t required; - unsigned long cr0; - unsigned long cr4; - - /* - * Ensure bits in CR0 and CR4 are valid in VMX operation: - * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. - * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. - */ - __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); - cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); - cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); - __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); - - __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); - cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); - cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); - /* Enable VMX operation */ - cr4 |= X86_CR4_VMXE; - __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); - - /* - * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: - * Bit 0: Lock bit. If clear, VMXON causes a #GP. - * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON - * outside of SMX causes a #GP. - */ - required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - required |= FEATURE_CONTROL_LOCKED; - feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); - if ((feature_control & required) != required) - wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required); - - /* Enter VMX root operation. */ - *(uint32_t *)(vmx->vmxon) = vmcs_revision(); - if (vmxon(vmx->vmxon_gpa)) - return false; - - /* Load a VMCS. */ - *(uint32_t *)(vmx->vmcs) = vmcs_revision(); - if (vmclear(vmx->vmcs_gpa)) - return false; - - if (vmptrld(vmx->vmcs_gpa)) - return false; - - /* Setup shadow VMCS, do not load it yet. */ - *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; - if (vmclear(vmx->shadow_vmcs_gpa)) - return false; - - return true; -} - -/* - * Initialize the control fields to the most basic settings possible. - */ -static inline void init_vmcs_control_fields(struct vmx_pages *vmx) -{ - vmwrite(VIRTUAL_PROCESSOR_ID, 0); - vmwrite(POSTED_INTR_NV, 0); - - vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); - if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0)) - vmwrite(CPU_BASED_VM_EXEC_CONTROL, - rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); - else - vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS)); - vmwrite(EXCEPTION_BITMAP, 0); - vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); - vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ - vmwrite(CR3_TARGET_COUNT, 0); - vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | - VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ - vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); - vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); - vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | - VM_ENTRY_IA32E_MODE); /* 64-bit guest */ - vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); - vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); - vmwrite(TPR_THRESHOLD, 0); - - vmwrite(CR0_GUEST_HOST_MASK, 0); - vmwrite(CR4_GUEST_HOST_MASK, 0); - vmwrite(CR0_READ_SHADOW, get_cr0()); - vmwrite(CR4_READ_SHADOW, get_cr4()); - - vmwrite(MSR_BITMAP, vmx->msr_gpa); - vmwrite(VMREAD_BITMAP, vmx->vmread_gpa); - vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa); -} - -/* - * Initialize the host state fields based on the current host state, with - * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch - * or vmresume. - */ -static inline void init_vmcs_host_state(void) -{ - uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); - - vmwrite(HOST_ES_SELECTOR, get_es()); - vmwrite(HOST_CS_SELECTOR, get_cs()); - vmwrite(HOST_SS_SELECTOR, get_ss()); - vmwrite(HOST_DS_SELECTOR, get_ds()); - vmwrite(HOST_FS_SELECTOR, get_fs()); - vmwrite(HOST_GS_SELECTOR, get_gs()); - vmwrite(HOST_TR_SELECTOR, get_tr()); - - if (exit_controls & VM_EXIT_LOAD_IA32_PAT) - vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); - if (exit_controls & VM_EXIT_LOAD_IA32_EFER) - vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); - if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) - vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, - rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); - - vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); - - vmwrite(HOST_CR0, get_cr0()); - vmwrite(HOST_CR3, get_cr3()); - vmwrite(HOST_CR4, get_cr4()); - vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); - vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); - vmwrite(HOST_TR_BASE, - get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr()))); - vmwrite(HOST_GDTR_BASE, get_gdt_base()); - vmwrite(HOST_IDTR_BASE, get_idt_base()); - vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); - vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); -} - -/* - * Initialize the guest state fields essentially as a clone of - * the host state fields. Some host state fields have fixed - * values, and we set the corresponding guest state fields accordingly. - */ -static inline void init_vmcs_guest_state(void *rip, void *rsp) -{ - vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); - vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); - vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); - vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); - vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); - vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); - vmwrite(GUEST_LDTR_SELECTOR, 0); - vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); - vmwrite(GUEST_INTR_STATUS, 0); - vmwrite(GUEST_PML_INDEX, 0); - - vmwrite(VMCS_LINK_POINTER, -1ll); - vmwrite(GUEST_IA32_DEBUGCTL, 0); - vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); - vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); - vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, - vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); - - vmwrite(GUEST_ES_LIMIT, -1); - vmwrite(GUEST_CS_LIMIT, -1); - vmwrite(GUEST_SS_LIMIT, -1); - vmwrite(GUEST_DS_LIMIT, -1); - vmwrite(GUEST_FS_LIMIT, -1); - vmwrite(GUEST_GS_LIMIT, -1); - vmwrite(GUEST_LDTR_LIMIT, -1); - vmwrite(GUEST_TR_LIMIT, 0x67); - vmwrite(GUEST_GDTR_LIMIT, 0xffff); - vmwrite(GUEST_IDTR_LIMIT, 0xffff); - vmwrite(GUEST_ES_AR_BYTES, - vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); - vmwrite(GUEST_CS_AR_BYTES, 0xa09b); - vmwrite(GUEST_SS_AR_BYTES, 0xc093); - vmwrite(GUEST_DS_AR_BYTES, - vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); - vmwrite(GUEST_FS_AR_BYTES, - vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); - vmwrite(GUEST_GS_AR_BYTES, - vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); - vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); - vmwrite(GUEST_TR_AR_BYTES, 0x8b); - vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); - vmwrite(GUEST_ACTIVITY_STATE, 0); - vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); - vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); - - vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); - vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); - vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); - vmwrite(GUEST_ES_BASE, 0); - vmwrite(GUEST_CS_BASE, 0); - vmwrite(GUEST_SS_BASE, 0); - vmwrite(GUEST_DS_BASE, 0); - vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); - vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); - vmwrite(GUEST_LDTR_BASE, 0); - vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); - vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); - vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); - vmwrite(GUEST_DR7, 0x400); - vmwrite(GUEST_RSP, (uint64_t)rsp); - vmwrite(GUEST_RIP, (uint64_t)rip); - vmwrite(GUEST_RFLAGS, 2); - vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); - vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); - vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); -} - -void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) -{ - init_vmcs_control_fields(vmx); - init_vmcs_host_state(); - init_vmcs_guest_state(guest_rip, guest_rsp); -} diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c deleted file mode 100644 index bb6c65ebfa77..000000000000 --- a/tools/testing/selftests/kvm/lib/x86.c +++ /dev/null @@ -1,888 +0,0 @@ -/* - * tools/testing/selftests/kvm/lib/x86.c - * - * Copyright (C) 2018, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ - -#define _GNU_SOURCE /* for program_invocation_name */ - -#include "test_util.h" -#include "kvm_util.h" -#include "kvm_util_internal.h" -#include "x86.h" - -/* Minimum physical address used for virtual translation tables. */ -#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 - -/* Virtual translation table structure declarations */ -struct pageMapL4Entry { - uint64_t present:1; - uint64_t writable:1; - uint64_t user:1; - uint64_t write_through:1; - uint64_t cache_disable:1; - uint64_t accessed:1; - uint64_t ignored_06:1; - uint64_t page_size:1; - uint64_t ignored_11_08:4; - uint64_t address:40; - uint64_t ignored_62_52:11; - uint64_t execute_disable:1; -}; - -struct pageDirectoryPointerEntry { - uint64_t present:1; - uint64_t writable:1; - uint64_t user:1; - uint64_t write_through:1; - uint64_t cache_disable:1; - uint64_t accessed:1; - uint64_t ignored_06:1; - uint64_t page_size:1; - uint64_t ignored_11_08:4; - uint64_t address:40; - uint64_t ignored_62_52:11; - uint64_t execute_disable:1; -}; - -struct pageDirectoryEntry { - uint64_t present:1; - uint64_t writable:1; - uint64_t user:1; - uint64_t write_through:1; - uint64_t cache_disable:1; - uint64_t accessed:1; - uint64_t ignored_06:1; - uint64_t page_size:1; - uint64_t ignored_11_08:4; - uint64_t address:40; - uint64_t ignored_62_52:11; - uint64_t execute_disable:1; -}; - -struct pageTableEntry { - uint64_t present:1; - uint64_t writable:1; - uint64_t user:1; - uint64_t write_through:1; - uint64_t cache_disable:1; - uint64_t accessed:1; - uint64_t dirty:1; - uint64_t reserved_07:1; - uint64_t global:1; - uint64_t ignored_11_09:3; - uint64_t address:40; - uint64_t ignored_62_52:11; - uint64_t execute_disable:1; -}; - -/* Register Dump - * - * Input Args: - * indent - Left margin indent amount - * regs - register - * - * Output Args: - * stream - Output FILE stream - * - * Return: None - * - * Dumps the state of the registers given by regs, to the FILE stream - * given by steam. - */ -void regs_dump(FILE *stream, struct kvm_regs *regs, - uint8_t indent) -{ - fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " - "rcx: 0x%.16llx rdx: 0x%.16llx\n", - indent, "", - regs->rax, regs->rbx, regs->rcx, regs->rdx); - fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " - "rsp: 0x%.16llx rbp: 0x%.16llx\n", - indent, "", - regs->rsi, regs->rdi, regs->rsp, regs->rbp); - fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " - "r10: 0x%.16llx r11: 0x%.16llx\n", - indent, "", - regs->r8, regs->r9, regs->r10, regs->r11); - fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " - "r14: 0x%.16llx r15: 0x%.16llx\n", - indent, "", - regs->r12, regs->r13, regs->r14, regs->r15); - fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", - indent, "", - regs->rip, regs->rflags); -} - -/* Segment Dump - * - * Input Args: - * indent - Left margin indent amount - * segment - KVM segment - * - * Output Args: - * stream - Output FILE stream - * - * Return: None - * - * Dumps the state of the KVM segment given by segment, to the FILE stream - * given by steam. - */ -static void segment_dump(FILE *stream, struct kvm_segment *segment, - uint8_t indent) -{ - fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " - "selector: 0x%.4x type: 0x%.2x\n", - indent, "", segment->base, segment->limit, - segment->selector, segment->type); - fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " - "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", - indent, "", segment->present, segment->dpl, - segment->db, segment->s, segment->l); - fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " - "unusable: 0x%.2x padding: 0x%.2x\n", - indent, "", segment->g, segment->avl, - segment->unusable, segment->padding); -} - -/* dtable Dump - * - * Input Args: - * indent - Left margin indent amount - * dtable - KVM dtable - * - * Output Args: - * stream - Output FILE stream - * - * Return: None - * - * Dumps the state of the KVM dtable given by dtable, to the FILE stream - * given by steam. - */ -static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, - uint8_t indent) -{ - fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " - "padding: 0x%.4x 0x%.4x 0x%.4x\n", - indent, "", dtable->base, dtable->limit, - dtable->padding[0], dtable->padding[1], dtable->padding[2]); -} - -/* System Register Dump - * - * Input Args: - * indent - Left margin indent amount - * sregs - System registers - * - * Output Args: - * stream - Output FILE stream - * - * Return: None - * - * Dumps the state of the system registers given by sregs, to the FILE stream - * given by steam. - */ -void sregs_dump(FILE *stream, struct kvm_sregs *sregs, - uint8_t indent) -{ - unsigned int i; - - fprintf(stream, "%*scs:\n", indent, ""); - segment_dump(stream, &sregs->cs, indent + 2); - fprintf(stream, "%*sds:\n", indent, ""); - segment_dump(stream, &sregs->ds, indent + 2); - fprintf(stream, "%*ses:\n", indent, ""); - segment_dump(stream, &sregs->es, indent + 2); - fprintf(stream, "%*sfs:\n", indent, ""); - segment_dump(stream, &sregs->fs, indent + 2); - fprintf(stream, "%*sgs:\n", indent, ""); - segment_dump(stream, &sregs->gs, indent + 2); - fprintf(stream, "%*sss:\n", indent, ""); - segment_dump(stream, &sregs->ss, indent + 2); - fprintf(stream, "%*str:\n", indent, ""); - segment_dump(stream, &sregs->tr, indent + 2); - fprintf(stream, "%*sldt:\n", indent, ""); - segment_dump(stream, &sregs->ldt, indent + 2); - - fprintf(stream, "%*sgdt:\n", indent, ""); - dtable_dump(stream, &sregs->gdt, indent + 2); - fprintf(stream, "%*sidt:\n", indent, ""); - dtable_dump(stream, &sregs->idt, indent + 2); - - fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " - "cr3: 0x%.16llx cr4: 0x%.16llx\n", - indent, "", - sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); - fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " - "apic_base: 0x%.16llx\n", - indent, "", - sregs->cr8, sregs->efer, sregs->apic_base); - - fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); - for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { - fprintf(stream, "%*s%.16llx\n", indent + 2, "", - sregs->interrupt_bitmap[i]); - } -} - -void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) -{ - int rc; - - TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - /* If needed, create page map l4 table. */ - if (!vm->pgd_created) { - vm_paddr_t paddr = vm_phy_page_alloc(vm, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); - vm->pgd = paddr; - vm->pgd_created = true; - } -} - -/* VM Virtual Page Map - * - * Input Args: - * vm - Virtual Machine - * vaddr - VM Virtual Address - * paddr - VM Physical Address - * pgd_memslot - Memory region slot for new virtual translation tables - * - * Output Args: None - * - * Return: None - * - * Within the VM given by vm, creates a virtual translation for the page - * starting at vaddr to the page starting at paddr. - */ -void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint32_t pgd_memslot) -{ - uint16_t index[4]; - struct pageMapL4Entry *pml4e; - - TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - TEST_ASSERT((vaddr % vm->page_size) == 0, - "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", - vaddr, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", - vaddr); - TEST_ASSERT((paddr % vm->page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", - paddr, vm->page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - - index[0] = (vaddr >> 12) & 0x1ffu; - index[1] = (vaddr >> 21) & 0x1ffu; - index[2] = (vaddr >> 30) & 0x1ffu; - index[3] = (vaddr >> 39) & 0x1ffu; - - /* Allocate page directory pointer table if not present. */ - pml4e = addr_gpa2hva(vm, vm->pgd); - if (!pml4e[index[3]].present) { - pml4e[index[3]].address = vm_phy_page_alloc(vm, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) - >> vm->page_shift; - pml4e[index[3]].writable = true; - pml4e[index[3]].present = true; - } - - /* Allocate page directory table if not present. */ - struct pageDirectoryPointerEntry *pdpe; - pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); - if (!pdpe[index[2]].present) { - pdpe[index[2]].address = vm_phy_page_alloc(vm, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) - >> vm->page_shift; - pdpe[index[2]].writable = true; - pdpe[index[2]].present = true; - } - - /* Allocate page table if not present. */ - struct pageDirectoryEntry *pde; - pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); - if (!pde[index[1]].present) { - pde[index[1]].address = vm_phy_page_alloc(vm, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) - >> vm->page_shift; - pde[index[1]].writable = true; - pde[index[1]].present = true; - } - - /* Fill in page table entry. */ - struct pageTableEntry *pte; - pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); - pte[index[0]].address = paddr >> vm->page_shift; - pte[index[0]].writable = true; - pte[index[0]].present = 1; -} - -/* Virtual Translation Tables Dump - * - * Input Args: - * vm - Virtual Machine - * indent - Left margin indent amount - * - * Output Args: - * stream - Output FILE stream - * - * Return: None - * - * Dumps to the FILE stream given by stream, the contents of all the - * virtual translation tables for the VM given by vm. - */ -void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) -{ - struct pageMapL4Entry *pml4e, *pml4e_start; - struct pageDirectoryPointerEntry *pdpe, *pdpe_start; - struct pageDirectoryEntry *pde, *pde_start; - struct pageTableEntry *pte, *pte_start; - - if (!vm->pgd_created) - return; - - fprintf(stream, "%*s " - " no\n", indent, ""); - fprintf(stream, "%*s index hvaddr gpaddr " - "addr w exec dirty\n", - indent, ""); - pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, - vm->pgd); - for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { - pml4e = &pml4e_start[n1]; - if (!pml4e->present) - continue; - fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " - " %u\n", - indent, "", - pml4e - pml4e_start, pml4e, - addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, - pml4e->writable, pml4e->execute_disable); - - pdpe_start = addr_gpa2hva(vm, pml4e->address - * vm->page_size); - for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { - pdpe = &pdpe_start[n2]; - if (!pdpe->present) - continue; - fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " - "%u %u\n", - indent, "", - pdpe - pdpe_start, pdpe, - addr_hva2gpa(vm, pdpe), - (uint64_t) pdpe->address, pdpe->writable, - pdpe->execute_disable); - - pde_start = addr_gpa2hva(vm, - pdpe->address * vm->page_size); - for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { - pde = &pde_start[n3]; - if (!pde->present) - continue; - fprintf(stream, "%*spde 0x%-3zx %p " - "0x%-12lx 0x%-10lx %u %u\n", - indent, "", pde - pde_start, pde, - addr_hva2gpa(vm, pde), - (uint64_t) pde->address, pde->writable, - pde->execute_disable); - - pte_start = addr_gpa2hva(vm, - pde->address * vm->page_size); - for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { - pte = &pte_start[n4]; - if (!pte->present) - continue; - fprintf(stream, "%*spte 0x%-3zx %p " - "0x%-12lx 0x%-10lx %u %u " - " %u 0x%-10lx\n", - indent, "", - pte - pte_start, pte, - addr_hva2gpa(vm, pte), - (uint64_t) pte->address, - pte->writable, - pte->execute_disable, - pte->dirty, - ((uint64_t) n1 << 27) - | ((uint64_t) n2 << 18) - | ((uint64_t) n3 << 9) - | ((uint64_t) n4)); - } - } - } - } -} - -/* Set Unusable Segment - * - * Input Args: None - * - * Output Args: - * segp - Pointer to segment register - * - * Return: None - * - * Sets the segment register pointed to by segp to an unusable state. - */ -static void kvm_seg_set_unusable(struct kvm_segment *segp) -{ - memset(segp, 0, sizeof(*segp)); - segp->unusable = true; -} - -static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) -{ - void *gdt = addr_gva2hva(vm, vm->gdt); - struct desc64 *desc = gdt + (segp->selector >> 3) * 8; - - desc->limit0 = segp->limit & 0xFFFF; - desc->base0 = segp->base & 0xFFFF; - desc->base1 = segp->base >> 16; - desc->s = segp->s; - desc->type = segp->type; - desc->dpl = segp->dpl; - desc->p = segp->present; - desc->limit1 = segp->limit >> 16; - desc->l = segp->l; - desc->db = segp->db; - desc->g = segp->g; - desc->base2 = segp->base >> 24; - if (!segp->s) - desc->base3 = segp->base >> 32; -} - - -/* Set Long Mode Flat Kernel Code Segment - * - * Input Args: - * vm - VM whose GDT is being filled, or NULL to only write segp - * selector - selector value - * - * Output Args: - * segp - Pointer to KVM segment - * - * Return: None - * - * Sets up the KVM segment pointed to by segp, to be a code segment - * with the selector value given by selector. - */ -static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, - struct kvm_segment *segp) -{ - memset(segp, 0, sizeof(*segp)); - segp->selector = selector; - segp->limit = 0xFFFFFFFFu; - segp->s = 0x1; /* kTypeCodeData */ - segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed - * | kFlagCodeReadable - */ - segp->g = true; - segp->l = true; - segp->present = 1; - if (vm) - kvm_seg_fill_gdt_64bit(vm, segp); -} - -/* Set Long Mode Flat Kernel Data Segment - * - * Input Args: - * vm - VM whose GDT is being filled, or NULL to only write segp - * selector - selector value - * - * Output Args: - * segp - Pointer to KVM segment - * - * Return: None - * - * Sets up the KVM segment pointed to by segp, to be a data segment - * with the selector value given by selector. - */ -static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, - struct kvm_segment *segp) -{ - memset(segp, 0, sizeof(*segp)); - segp->selector = selector; - segp->limit = 0xFFFFFFFFu; - segp->s = 0x1; /* kTypeCodeData */ - segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed - * | kFlagDataWritable - */ - segp->g = true; - segp->present = true; - if (vm) - kvm_seg_fill_gdt_64bit(vm, segp); -} - -/* Address Guest Virtual to Guest Physical - * - * Input Args: - * vm - Virtual Machine - * gpa - VM virtual address - * - * Output Args: None - * - * Return: - * Equivalent VM physical address - * - * Translates the VM virtual address given by gva to a VM physical - * address and then locates the memory region containing the VM - * physical address, within the VM given by vm. When found, the host - * virtual address providing the memory to the vm physical address is returned. - * A TEST_ASSERT failure occurs if no region containing translated - * VM virtual address exists. - */ -vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) -{ - uint16_t index[4]; - struct pageMapL4Entry *pml4e; - struct pageDirectoryPointerEntry *pdpe; - struct pageDirectoryEntry *pde; - struct pageTableEntry *pte; - void *hva; - - TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - index[0] = (gva >> 12) & 0x1ffu; - index[1] = (gva >> 21) & 0x1ffu; - index[2] = (gva >> 30) & 0x1ffu; - index[3] = (gva >> 39) & 0x1ffu; - - if (!vm->pgd_created) - goto unmapped_gva; - pml4e = addr_gpa2hva(vm, vm->pgd); - if (!pml4e[index[3]].present) - goto unmapped_gva; - - pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); - if (!pdpe[index[2]].present) - goto unmapped_gva; - - pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); - if (!pde[index[1]].present) - goto unmapped_gva; - - pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); - if (!pte[index[0]].present) - goto unmapped_gva; - - return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); - -unmapped_gva: - TEST_ASSERT(false, "No mapping for vm virtual address, " - "gva: 0x%lx", gva); -} - -static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot, - int pgd_memslot) -{ - if (!vm->gdt) - vm->gdt = vm_vaddr_alloc(vm, getpagesize(), - KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); - - dt->base = vm->gdt; - dt->limit = getpagesize(); -} - -static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, - int selector, int gdt_memslot, - int pgd_memslot) -{ - if (!vm->tss) - vm->tss = vm_vaddr_alloc(vm, getpagesize(), - KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); - - memset(segp, 0, sizeof(*segp)); - segp->base = vm->tss; - segp->limit = 0x67; - segp->selector = selector; - segp->type = 0xb; - segp->present = 1; - kvm_seg_fill_gdt_64bit(vm, segp); -} - -void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) -{ - struct kvm_sregs sregs; - - /* Set mode specific system register values. */ - vcpu_sregs_get(vm, vcpuid, &sregs); - - sregs.idt.limit = 0; - - kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); - - switch (vm->mode) { - case VM_MODE_FLAT48PG: - sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; - sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; - sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); - - kvm_seg_set_unusable(&sregs.ldt); - kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); - kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); - break; - - default: - TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); - } - - sregs.cr3 = vm->pgd; - vcpu_sregs_set(vm, vcpuid, &sregs); -} -/* Adds a vCPU with reasonable defaults (i.e., a stack) - * - * Input Args: - * vcpuid - The id of the VCPU to add to the VM. - * guest_code - The vCPU's entry point - */ -void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) -{ - struct kvm_mp_state mp_state; - struct kvm_regs regs; - vm_vaddr_t stack_vaddr; - stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), - DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); - - /* Create VCPU */ - vm_vcpu_add(vm, vcpuid, 0, 0); - - /* Setup guest general purpose registers */ - vcpu_regs_get(vm, vcpuid, ®s); - regs.rflags = regs.rflags | 0x2; - regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); - regs.rip = (unsigned long) guest_code; - vcpu_regs_set(vm, vcpuid, ®s); - - /* Setup the MP state */ - mp_state.mp_state = 0; - vcpu_set_mp_state(vm, vcpuid, &mp_state); -} - -/* VM VCPU CPUID Set - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU id - * cpuid - The CPUID values to set. - * - * Output Args: None - * - * Return: void - * - * Set the VCPU's CPUID. - */ -void vcpu_set_cpuid(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_cpuid2 *cpuid) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int rc; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); - TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", - rc, errno); - -} -/* Create a VM with reasonable defaults - * - * Input Args: - * vcpuid - The id of the single VCPU to add to the VM. - * extra_mem_pages - The size of extra memories to add (this will - * decide how much extra space we will need to - * setup the page tables using mem slot 0) - * guest_code - The vCPU's entry point - * - * Output Args: None - * - * Return: - * Pointer to opaque structure that describes the created VM. - */ -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, - void *guest_code) -{ - struct kvm_vm *vm; - /* - * For x86 the maximum page table size for a memory region - * will be when only 4K pages are used. In that case the - * total extra size for page tables (for extra N pages) will - * be: N/512+N/512^2+N/512^3+... which is definitely smaller - * than N/512*2. - */ - uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; - - /* Create VM */ - vm = vm_create(VM_MODE_FLAT48PG, - DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, - O_RDWR); - - /* Setup guest code */ - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); - - /* Setup IRQ Chip */ - vm_create_irqchip(vm); - - /* Add the first vCPU. */ - vm_vcpu_add_default(vm, vcpuid, guest_code); - - return vm; -} - -struct kvm_x86_state { - struct kvm_vcpu_events events; - struct kvm_mp_state mp_state; - struct kvm_regs regs; - struct kvm_xsave xsave; - struct kvm_xcrs xcrs; - struct kvm_sregs sregs; - struct kvm_debugregs debugregs; - union { - struct kvm_nested_state nested; - char nested_[16384]; - }; - struct kvm_msrs msrs; -}; - -static int kvm_get_num_msrs(struct kvm_vm *vm) -{ - struct kvm_msr_list nmsrs; - int r; - - nmsrs.nmsrs = 0; - r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); - TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", - r); - - return nmsrs.nmsrs; -} - -struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - struct kvm_msr_list *list; - struct kvm_x86_state *state; - int nmsrs, r, i; - static int nested_size = -1; - - if (nested_size == -1) { - nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE); - TEST_ASSERT(nested_size <= sizeof(state->nested_), - "Nested state size too big, %i > %zi", - nested_size, sizeof(state->nested_)); - } - - nmsrs = kvm_get_num_msrs(vm); - list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); - list->nmsrs = nmsrs; - r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", - r); - - state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0])); - r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", - r); - - if (nested_size) { - state->nested.size = sizeof(state->nested_); - r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i", - r); - TEST_ASSERT(state->nested.size <= nested_size, - "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", - state->nested.size, nested_size); - } else - state->nested.size = 0; - - state->msrs.nmsrs = nmsrs; - for (i = 0; i < nmsrs; i++) - state->msrs.entries[i].index = list->indices[i]; - r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); - TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)", - r, r == nmsrs ? -1 : list->indices[r]); - - r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", - r); - - free(list); - return state; -} - -void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int r; - - if (state->nested.size) { - r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", - r); - } - - r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); - TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", - r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); - - r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", - r); -} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c new file mode 100644 index 000000000000..79c2c5c203c0 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -0,0 +1,888 @@ +/* + * tools/testing/selftests/kvm/lib/x86_64/processor.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "test_util.h" +#include "kvm_util.h" +#include "../kvm_util_internal.h" +#include "processor.h" + +/* Minimum physical address used for virtual translation tables. */ +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +/* Virtual translation table structure declarations */ +struct pageMapL4Entry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t ignored_06:1; + uint64_t page_size:1; + uint64_t ignored_11_08:4; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +struct pageDirectoryPointerEntry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t ignored_06:1; + uint64_t page_size:1; + uint64_t ignored_11_08:4; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +struct pageDirectoryEntry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t ignored_06:1; + uint64_t page_size:1; + uint64_t ignored_11_08:4; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +struct pageTableEntry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t dirty:1; + uint64_t reserved_07:1; + uint64_t global:1; + uint64_t ignored_11_09:3; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +/* Register Dump + * + * Input Args: + * indent - Left margin indent amount + * regs - register + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the state of the registers given by regs, to the FILE stream + * given by steam. + */ +void regs_dump(FILE *stream, struct kvm_regs *regs, + uint8_t indent) +{ + fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " + "rcx: 0x%.16llx rdx: 0x%.16llx\n", + indent, "", + regs->rax, regs->rbx, regs->rcx, regs->rdx); + fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " + "rsp: 0x%.16llx rbp: 0x%.16llx\n", + indent, "", + regs->rsi, regs->rdi, regs->rsp, regs->rbp); + fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " + "r10: 0x%.16llx r11: 0x%.16llx\n", + indent, "", + regs->r8, regs->r9, regs->r10, regs->r11); + fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " + "r14: 0x%.16llx r15: 0x%.16llx\n", + indent, "", + regs->r12, regs->r13, regs->r14, regs->r15); + fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", + indent, "", + regs->rip, regs->rflags); +} + +/* Segment Dump + * + * Input Args: + * indent - Left margin indent amount + * segment - KVM segment + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the state of the KVM segment given by segment, to the FILE stream + * given by steam. + */ +static void segment_dump(FILE *stream, struct kvm_segment *segment, + uint8_t indent) +{ + fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " + "selector: 0x%.4x type: 0x%.2x\n", + indent, "", segment->base, segment->limit, + segment->selector, segment->type); + fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " + "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", + indent, "", segment->present, segment->dpl, + segment->db, segment->s, segment->l); + fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " + "unusable: 0x%.2x padding: 0x%.2x\n", + indent, "", segment->g, segment->avl, + segment->unusable, segment->padding); +} + +/* dtable Dump + * + * Input Args: + * indent - Left margin indent amount + * dtable - KVM dtable + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the state of the KVM dtable given by dtable, to the FILE stream + * given by steam. + */ +static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, + uint8_t indent) +{ + fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " + "padding: 0x%.4x 0x%.4x 0x%.4x\n", + indent, "", dtable->base, dtable->limit, + dtable->padding[0], dtable->padding[1], dtable->padding[2]); +} + +/* System Register Dump + * + * Input Args: + * indent - Left margin indent amount + * sregs - System registers + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the state of the system registers given by sregs, to the FILE stream + * given by steam. + */ +void sregs_dump(FILE *stream, struct kvm_sregs *sregs, + uint8_t indent) +{ + unsigned int i; + + fprintf(stream, "%*scs:\n", indent, ""); + segment_dump(stream, &sregs->cs, indent + 2); + fprintf(stream, "%*sds:\n", indent, ""); + segment_dump(stream, &sregs->ds, indent + 2); + fprintf(stream, "%*ses:\n", indent, ""); + segment_dump(stream, &sregs->es, indent + 2); + fprintf(stream, "%*sfs:\n", indent, ""); + segment_dump(stream, &sregs->fs, indent + 2); + fprintf(stream, "%*sgs:\n", indent, ""); + segment_dump(stream, &sregs->gs, indent + 2); + fprintf(stream, "%*sss:\n", indent, ""); + segment_dump(stream, &sregs->ss, indent + 2); + fprintf(stream, "%*str:\n", indent, ""); + segment_dump(stream, &sregs->tr, indent + 2); + fprintf(stream, "%*sldt:\n", indent, ""); + segment_dump(stream, &sregs->ldt, indent + 2); + + fprintf(stream, "%*sgdt:\n", indent, ""); + dtable_dump(stream, &sregs->gdt, indent + 2); + fprintf(stream, "%*sidt:\n", indent, ""); + dtable_dump(stream, &sregs->idt, indent + 2); + + fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " + "cr3: 0x%.16llx cr4: 0x%.16llx\n", + indent, "", + sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); + fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " + "apic_base: 0x%.16llx\n", + indent, "", + sregs->cr8, sregs->efer, sregs->apic_base); + + fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); + for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { + fprintf(stream, "%*s%.16llx\n", indent + 2, "", + sregs->interrupt_bitmap[i]); + } +} + +void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) +{ + int rc; + + TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + /* If needed, create page map l4 table. */ + if (!vm->pgd_created) { + vm_paddr_t paddr = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + vm->pgd = paddr; + vm->pgd_created = true; + } +} + +/* VM Virtual Page Map + * + * Input Args: + * vm - Virtual Machine + * vaddr - VM Virtual Address + * paddr - VM Physical Address + * pgd_memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a virtual translation for the page + * starting at vaddr to the page starting at paddr. + */ +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint32_t pgd_memslot) +{ + uint16_t index[4]; + struct pageMapL4Entry *pml4e; + + TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + " vaddr: 0x%lx vm->page_size: 0x%x", + vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", + vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + index[0] = (vaddr >> 12) & 0x1ffu; + index[1] = (vaddr >> 21) & 0x1ffu; + index[2] = (vaddr >> 30) & 0x1ffu; + index[3] = (vaddr >> 39) & 0x1ffu; + + /* Allocate page directory pointer table if not present. */ + pml4e = addr_gpa2hva(vm, vm->pgd); + if (!pml4e[index[3]].present) { + pml4e[index[3]].address = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) + >> vm->page_shift; + pml4e[index[3]].writable = true; + pml4e[index[3]].present = true; + } + + /* Allocate page directory table if not present. */ + struct pageDirectoryPointerEntry *pdpe; + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].present) { + pdpe[index[2]].address = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) + >> vm->page_shift; + pdpe[index[2]].writable = true; + pdpe[index[2]].present = true; + } + + /* Allocate page table if not present. */ + struct pageDirectoryEntry *pde; + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].present) { + pde[index[1]].address = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) + >> vm->page_shift; + pde[index[1]].writable = true; + pde[index[1]].present = true; + } + + /* Fill in page table entry. */ + struct pageTableEntry *pte; + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + pte[index[0]].address = paddr >> vm->page_shift; + pte[index[0]].writable = true; + pte[index[0]].present = 1; +} + +/* Virtual Translation Tables Dump + * + * Input Args: + * vm - Virtual Machine + * indent - Left margin indent amount + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps to the FILE stream given by stream, the contents of all the + * virtual translation tables for the VM given by vm. + */ +void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + struct pageMapL4Entry *pml4e, *pml4e_start; + struct pageDirectoryPointerEntry *pdpe, *pdpe_start; + struct pageDirectoryEntry *pde, *pde_start; + struct pageTableEntry *pte, *pte_start; + + if (!vm->pgd_created) + return; + + fprintf(stream, "%*s " + " no\n", indent, ""); + fprintf(stream, "%*s index hvaddr gpaddr " + "addr w exec dirty\n", + indent, ""); + pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, + vm->pgd); + for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { + pml4e = &pml4e_start[n1]; + if (!pml4e->present) + continue; + fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " + " %u\n", + indent, "", + pml4e - pml4e_start, pml4e, + addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, + pml4e->writable, pml4e->execute_disable); + + pdpe_start = addr_gpa2hva(vm, pml4e->address + * vm->page_size); + for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { + pdpe = &pdpe_start[n2]; + if (!pdpe->present) + continue; + fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " + "%u %u\n", + indent, "", + pdpe - pdpe_start, pdpe, + addr_hva2gpa(vm, pdpe), + (uint64_t) pdpe->address, pdpe->writable, + pdpe->execute_disable); + + pde_start = addr_gpa2hva(vm, + pdpe->address * vm->page_size); + for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { + pde = &pde_start[n3]; + if (!pde->present) + continue; + fprintf(stream, "%*spde 0x%-3zx %p " + "0x%-12lx 0x%-10lx %u %u\n", + indent, "", pde - pde_start, pde, + addr_hva2gpa(vm, pde), + (uint64_t) pde->address, pde->writable, + pde->execute_disable); + + pte_start = addr_gpa2hva(vm, + pde->address * vm->page_size); + for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { + pte = &pte_start[n4]; + if (!pte->present) + continue; + fprintf(stream, "%*spte 0x%-3zx %p " + "0x%-12lx 0x%-10lx %u %u " + " %u 0x%-10lx\n", + indent, "", + pte - pte_start, pte, + addr_hva2gpa(vm, pte), + (uint64_t) pte->address, + pte->writable, + pte->execute_disable, + pte->dirty, + ((uint64_t) n1 << 27) + | ((uint64_t) n2 << 18) + | ((uint64_t) n3 << 9) + | ((uint64_t) n4)); + } + } + } + } +} + +/* Set Unusable Segment + * + * Input Args: None + * + * Output Args: + * segp - Pointer to segment register + * + * Return: None + * + * Sets the segment register pointed to by segp to an unusable state. + */ +static void kvm_seg_set_unusable(struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->unusable = true; +} + +static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) +{ + void *gdt = addr_gva2hva(vm, vm->gdt); + struct desc64 *desc = gdt + (segp->selector >> 3) * 8; + + desc->limit0 = segp->limit & 0xFFFF; + desc->base0 = segp->base & 0xFFFF; + desc->base1 = segp->base >> 16; + desc->s = segp->s; + desc->type = segp->type; + desc->dpl = segp->dpl; + desc->p = segp->present; + desc->limit1 = segp->limit >> 16; + desc->l = segp->l; + desc->db = segp->db; + desc->g = segp->g; + desc->base2 = segp->base >> 24; + if (!segp->s) + desc->base3 = segp->base >> 32; +} + + +/* Set Long Mode Flat Kernel Code Segment + * + * Input Args: + * vm - VM whose GDT is being filled, or NULL to only write segp + * selector - selector value + * + * Output Args: + * segp - Pointer to KVM segment + * + * Return: None + * + * Sets up the KVM segment pointed to by segp, to be a code segment + * with the selector value given by selector. + */ +static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, + struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->selector = selector; + segp->limit = 0xFFFFFFFFu; + segp->s = 0x1; /* kTypeCodeData */ + segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed + * | kFlagCodeReadable + */ + segp->g = true; + segp->l = true; + segp->present = 1; + if (vm) + kvm_seg_fill_gdt_64bit(vm, segp); +} + +/* Set Long Mode Flat Kernel Data Segment + * + * Input Args: + * vm - VM whose GDT is being filled, or NULL to only write segp + * selector - selector value + * + * Output Args: + * segp - Pointer to KVM segment + * + * Return: None + * + * Sets up the KVM segment pointed to by segp, to be a data segment + * with the selector value given by selector. + */ +static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, + struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->selector = selector; + segp->limit = 0xFFFFFFFFu; + segp->s = 0x1; /* kTypeCodeData */ + segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed + * | kFlagDataWritable + */ + segp->g = true; + segp->present = true; + if (vm) + kvm_seg_fill_gdt_64bit(vm, segp); +} + +/* Address Guest Virtual to Guest Physical + * + * Input Args: + * vm - Virtual Machine + * gpa - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Translates the VM virtual address given by gva to a VM physical + * address and then locates the memory region containing the VM + * physical address, within the VM given by vm. When found, the host + * virtual address providing the memory to the vm physical address is returned. + * A TEST_ASSERT failure occurs if no region containing translated + * VM virtual address exists. + */ +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint16_t index[4]; + struct pageMapL4Entry *pml4e; + struct pageDirectoryPointerEntry *pdpe; + struct pageDirectoryEntry *pde; + struct pageTableEntry *pte; + void *hva; + + TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + index[0] = (gva >> 12) & 0x1ffu; + index[1] = (gva >> 21) & 0x1ffu; + index[2] = (gva >> 30) & 0x1ffu; + index[3] = (gva >> 39) & 0x1ffu; + + if (!vm->pgd_created) + goto unmapped_gva; + pml4e = addr_gpa2hva(vm, vm->pgd); + if (!pml4e[index[3]].present) + goto unmapped_gva; + + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].present) + goto unmapped_gva; + + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].present) + goto unmapped_gva; + + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + if (!pte[index[0]].present) + goto unmapped_gva; + + return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); + +unmapped_gva: + TEST_ASSERT(false, "No mapping for vm virtual address, " + "gva: 0x%lx", gva); +} + +static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot, + int pgd_memslot) +{ + if (!vm->gdt) + vm->gdt = vm_vaddr_alloc(vm, getpagesize(), + KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); + + dt->base = vm->gdt; + dt->limit = getpagesize(); +} + +static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, + int selector, int gdt_memslot, + int pgd_memslot) +{ + if (!vm->tss) + vm->tss = vm_vaddr_alloc(vm, getpagesize(), + KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); + + memset(segp, 0, sizeof(*segp)); + segp->base = vm->tss; + segp->limit = 0x67; + segp->selector = selector; + segp->type = 0xb; + segp->present = 1; + kvm_seg_fill_gdt_64bit(vm, segp); +} + +void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) +{ + struct kvm_sregs sregs; + + /* Set mode specific system register values. */ + vcpu_sregs_get(vm, vcpuid, &sregs); + + sregs.idt.limit = 0; + + kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); + + switch (vm->mode) { + case VM_MODE_FLAT48PG: + sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; + sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; + sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); + + kvm_seg_set_unusable(&sregs.ldt); + kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); + kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); + kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); + kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); + break; + + default: + TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); + } + + sregs.cr3 = vm->pgd; + vcpu_sregs_set(vm, vcpuid, &sregs); +} +/* Adds a vCPU with reasonable defaults (i.e., a stack) + * + * Input Args: + * vcpuid - The id of the VCPU to add to the VM. + * guest_code - The vCPU's entry point + */ +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +{ + struct kvm_mp_state mp_state; + struct kvm_regs regs; + vm_vaddr_t stack_vaddr; + stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); + + /* Create VCPU */ + vm_vcpu_add(vm, vcpuid, 0, 0); + + /* Setup guest general purpose registers */ + vcpu_regs_get(vm, vcpuid, ®s); + regs.rflags = regs.rflags | 0x2; + regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); + regs.rip = (unsigned long) guest_code; + vcpu_regs_set(vm, vcpuid, ®s); + + /* Setup the MP state */ + mp_state.mp_state = 0; + vcpu_set_mp_state(vm, vcpuid, &mp_state); +} + +/* VM VCPU CPUID Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU id + * cpuid - The CPUID values to set. + * + * Output Args: None + * + * Return: void + * + * Set the VCPU's CPUID. + */ +void vcpu_set_cpuid(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_cpuid2 *cpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int rc; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); + TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", + rc, errno); + +} +/* Create a VM with reasonable defaults + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * extra_mem_pages - The size of extra memories to add (this will + * decide how much extra space we will need to + * setup the page tables using mem slot 0) + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, + void *guest_code) +{ + struct kvm_vm *vm; + /* + * For x86 the maximum page table size for a memory region + * will be when only 4K pages are used. In that case the + * total extra size for page tables (for extra N pages) will + * be: N/512+N/512^2+N/512^3+... which is definitely smaller + * than N/512*2. + */ + uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; + + /* Create VM */ + vm = vm_create(VM_MODE_FLAT48PG, + DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, + O_RDWR); + + /* Setup guest code */ + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + + /* Setup IRQ Chip */ + vm_create_irqchip(vm); + + /* Add the first vCPU. */ + vm_vcpu_add_default(vm, vcpuid, guest_code); + + return vm; +} + +struct kvm_x86_state { + struct kvm_vcpu_events events; + struct kvm_mp_state mp_state; + struct kvm_regs regs; + struct kvm_xsave xsave; + struct kvm_xcrs xcrs; + struct kvm_sregs sregs; + struct kvm_debugregs debugregs; + union { + struct kvm_nested_state nested; + char nested_[16384]; + }; + struct kvm_msrs msrs; +}; + +static int kvm_get_num_msrs(struct kvm_vm *vm) +{ + struct kvm_msr_list nmsrs; + int r; + + nmsrs.nmsrs = 0; + r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); + TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", + r); + + return nmsrs.nmsrs; +} + +struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + struct kvm_msr_list *list; + struct kvm_x86_state *state; + int nmsrs, r, i; + static int nested_size = -1; + + if (nested_size == -1) { + nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE); + TEST_ASSERT(nested_size <= sizeof(state->nested_), + "Nested state size too big, %i > %zi", + nested_size, sizeof(state->nested_)); + } + + nmsrs = kvm_get_num_msrs(vm); + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", + r); + + state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0])); + r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", + r); + + if (nested_size) { + state->nested.size = sizeof(state->nested_); + r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i", + r); + TEST_ASSERT(state->nested.size <= nested_size, + "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", + state->nested.size, nested_size); + } else + state->nested.size = 0; + + state->msrs.nmsrs = nmsrs; + for (i = 0; i < nmsrs; i++) + state->msrs.entries[i].index = list->indices[i]; + r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); + TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)", + r, r == nmsrs ? -1 : list->indices[r]); + + r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", + r); + + free(list); + return state; +} + +void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int r; + + if (state->nested.size) { + r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", + r); + } + + r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); + TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", + r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); + + r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", + r); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c new file mode 100644 index 000000000000..d7c401472247 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -0,0 +1,283 @@ +/* + * tools/testing/selftests/kvm/lib/x86_64/vmx.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +/* Allocate memory regions for nested VMX tests. + * + * Input Args: + * vm - The VM to allocate guest-virtual addresses in. + * + * Output Args: + * p_vmx_gva - The guest virtual address for the struct vmx_pages. + * + * Return: + * Pointer to structure with the addresses of the VMX areas. + */ +struct vmx_pages * +vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) +{ + vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva); + + /* Setup of a region of guest memory for the vmxon region. */ + vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon); + vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon); + + /* Setup of a region of guest memory for a vmcs. */ + vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs); + vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs); + + /* Setup of a region of guest memory for the MSR bitmap. */ + vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr); + vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr); + memset(vmx->msr_hva, 0, getpagesize()); + + /* Setup of a region of guest memory for the shadow VMCS. */ + vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs); + vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs); + + /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */ + vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread); + vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread); + memset(vmx->vmread_hva, 0, getpagesize()); + + vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite); + vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); + memset(vmx->vmwrite_hva, 0, getpagesize()); + + *p_vmx_gva = vmx_gva; + return vmx; +} + +bool prepare_for_vmx_operation(struct vmx_pages *vmx) +{ + uint64_t feature_control; + uint64_t required; + unsigned long cr0; + unsigned long cr4; + + /* + * Ensure bits in CR0 and CR4 are valid in VMX operation: + * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. + * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. + */ + __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); + cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); + cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); + __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); + + __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); + cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); + cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); + /* Enable VMX operation */ + cr4 |= X86_CR4_VMXE; + __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); + + /* + * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: + * Bit 0: Lock bit. If clear, VMXON causes a #GP. + * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON + * outside of SMX causes a #GP. + */ + required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + required |= FEATURE_CONTROL_LOCKED; + feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + if ((feature_control & required) != required) + wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required); + + /* Enter VMX root operation. */ + *(uint32_t *)(vmx->vmxon) = vmcs_revision(); + if (vmxon(vmx->vmxon_gpa)) + return false; + + /* Load a VMCS. */ + *(uint32_t *)(vmx->vmcs) = vmcs_revision(); + if (vmclear(vmx->vmcs_gpa)) + return false; + + if (vmptrld(vmx->vmcs_gpa)) + return false; + + /* Setup shadow VMCS, do not load it yet. */ + *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; + if (vmclear(vmx->shadow_vmcs_gpa)) + return false; + + return true; +} + +/* + * Initialize the control fields to the most basic settings possible. + */ +static inline void init_vmcs_control_fields(struct vmx_pages *vmx) +{ + vmwrite(VIRTUAL_PROCESSOR_ID, 0); + vmwrite(POSTED_INTR_NV, 0); + + vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); + if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0)) + vmwrite(CPU_BASED_VM_EXEC_CONTROL, + rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); + else + vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS)); + vmwrite(EXCEPTION_BITMAP, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ + vmwrite(CR3_TARGET_COUNT, 0); + vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | + VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ + vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); + vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | + VM_ENTRY_IA32E_MODE); /* 64-bit guest */ + vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); + vmwrite(TPR_THRESHOLD, 0); + + vmwrite(CR0_GUEST_HOST_MASK, 0); + vmwrite(CR4_GUEST_HOST_MASK, 0); + vmwrite(CR0_READ_SHADOW, get_cr0()); + vmwrite(CR4_READ_SHADOW, get_cr4()); + + vmwrite(MSR_BITMAP, vmx->msr_gpa); + vmwrite(VMREAD_BITMAP, vmx->vmread_gpa); + vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa); +} + +/* + * Initialize the host state fields based on the current host state, with + * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch + * or vmresume. + */ +static inline void init_vmcs_host_state(void) +{ + uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); + + vmwrite(HOST_ES_SELECTOR, get_es()); + vmwrite(HOST_CS_SELECTOR, get_cs()); + vmwrite(HOST_SS_SELECTOR, get_ss()); + vmwrite(HOST_DS_SELECTOR, get_ds()); + vmwrite(HOST_FS_SELECTOR, get_fs()); + vmwrite(HOST_GS_SELECTOR, get_gs()); + vmwrite(HOST_TR_SELECTOR, get_tr()); + + if (exit_controls & VM_EXIT_LOAD_IA32_PAT) + vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); + if (exit_controls & VM_EXIT_LOAD_IA32_EFER) + vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); + if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, + rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); + + vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); + + vmwrite(HOST_CR0, get_cr0()); + vmwrite(HOST_CR3, get_cr3()); + vmwrite(HOST_CR4, get_cr4()); + vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); + vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); + vmwrite(HOST_TR_BASE, + get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr()))); + vmwrite(HOST_GDTR_BASE, get_gdt_base()); + vmwrite(HOST_IDTR_BASE, get_idt_base()); + vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); + vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); +} + +/* + * Initialize the guest state fields essentially as a clone of + * the host state fields. Some host state fields have fixed + * values, and we set the corresponding guest state fields accordingly. + */ +static inline void init_vmcs_guest_state(void *rip, void *rsp) +{ + vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); + vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); + vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); + vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); + vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); + vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); + vmwrite(GUEST_LDTR_SELECTOR, 0); + vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); + vmwrite(GUEST_INTR_STATUS, 0); + vmwrite(GUEST_PML_INDEX, 0); + + vmwrite(VMCS_LINK_POINTER, -1ll); + vmwrite(GUEST_IA32_DEBUGCTL, 0); + vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); + vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); + vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, + vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); + + vmwrite(GUEST_ES_LIMIT, -1); + vmwrite(GUEST_CS_LIMIT, -1); + vmwrite(GUEST_SS_LIMIT, -1); + vmwrite(GUEST_DS_LIMIT, -1); + vmwrite(GUEST_FS_LIMIT, -1); + vmwrite(GUEST_GS_LIMIT, -1); + vmwrite(GUEST_LDTR_LIMIT, -1); + vmwrite(GUEST_TR_LIMIT, 0x67); + vmwrite(GUEST_GDTR_LIMIT, 0xffff); + vmwrite(GUEST_IDTR_LIMIT, 0xffff); + vmwrite(GUEST_ES_AR_BYTES, + vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_CS_AR_BYTES, 0xa09b); + vmwrite(GUEST_SS_AR_BYTES, 0xc093); + vmwrite(GUEST_DS_AR_BYTES, + vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_FS_AR_BYTES, + vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_GS_AR_BYTES, + vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); + vmwrite(GUEST_TR_AR_BYTES, 0x8b); + vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); + vmwrite(GUEST_ACTIVITY_STATE, 0); + vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); + vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); + + vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); + vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); + vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); + vmwrite(GUEST_ES_BASE, 0); + vmwrite(GUEST_CS_BASE, 0); + vmwrite(GUEST_SS_BASE, 0); + vmwrite(GUEST_DS_BASE, 0); + vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); + vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); + vmwrite(GUEST_LDTR_BASE, 0); + vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); + vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); + vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); + vmwrite(GUEST_DR7, 0x400); + vmwrite(GUEST_RSP, (uint64_t)rsp); + vmwrite(GUEST_RIP, (uint64_t)rip); + vmwrite(GUEST_RFLAGS, 2); + vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); + vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); + vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); +} + +void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) +{ + init_vmcs_control_fields(vmx); + init_vmcs_host_state(); + init_vmcs_guest_state(guest_rip, guest_rsp); +} diff --git a/tools/testing/selftests/kvm/platform_info_test.c b/tools/testing/selftests/kvm/platform_info_test.c deleted file mode 100644 index aa6a14331461..000000000000 --- a/tools/testing/selftests/kvm/platform_info_test.c +++ /dev/null @@ -1,110 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test for x86 KVM_CAP_MSR_PLATFORM_INFO - * - * Copyright (C) 2018, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Verifies expected behavior of controlling guest access to - * MSR_PLATFORM_INFO. - */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "x86.h" - -#define VCPU_ID 0 -#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00 - -static void guest_code(void) -{ - uint64_t msr_platform_info; - - for (;;) { - msr_platform_info = rdmsr(MSR_PLATFORM_INFO); - GUEST_SYNC(msr_platform_info); - asm volatile ("inc %r11"); - } -} - -static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable) -{ - struct kvm_enable_cap cap = {}; - - cap.cap = KVM_CAP_MSR_PLATFORM_INFO; - cap.flags = 0; - cap.args[0] = (int)enable; - vm_enable_cap(vm, &cap); -} - -static void test_msr_platform_info_enabled(struct kvm_vm *vm) -{ - struct kvm_run *run = vcpu_state(vm, VCPU_ID); - struct ucall uc; - - set_msr_platform_info_enabled(vm, true); - vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Exit_reason other than KVM_EXIT_IO: %u (%s),\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); - get_ucall(vm, VCPU_ID, &uc); - TEST_ASSERT(uc.cmd == UCALL_SYNC, - "Received ucall other than UCALL_SYNC: %u\n", - ucall); - TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == - MSR_PLATFORM_INFO_MAX_TURBO_RATIO, - "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.", - MSR_PLATFORM_INFO_MAX_TURBO_RATIO); -} - -static void test_msr_platform_info_disabled(struct kvm_vm *vm) -{ - struct kvm_run *run = vcpu_state(vm, VCPU_ID); - - set_msr_platform_info_enabled(vm, false); - vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, - "Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vm *vm; - struct kvm_run *state; - int rv; - uint64_t msr_platform_info; - - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - - rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO); - if (!rv) { - fprintf(stderr, - "KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n"); - exit(KSFT_SKIP); - } - - vm = vm_create_default(VCPU_ID, 0, guest_code); - - msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO); - vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, - msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); - test_msr_platform_info_disabled(vm); - test_msr_platform_info_enabled(vm); - vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info); - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/set_sregs_test.c deleted file mode 100644 index 881419d5746e..000000000000 --- a/tools/testing/selftests/kvm/set_sregs_test.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * KVM_SET_SREGS tests - * - * Copyright (C) 2018, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * This is a regression test for the bug fixed by the following commit: - * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values") - * - * That bug allowed a user-mode program that called the KVM_SET_SREGS - * ioctl to put a VCPU's local APIC into an invalid state. - * - */ -#define _GNU_SOURCE /* for program_invocation_short_name */ -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "x86.h" - -#define VCPU_ID 5 - -int main(int argc, char *argv[]) -{ - struct kvm_sregs sregs; - struct kvm_vm *vm; - int rc; - - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, NULL); - - vcpu_sregs_get(vm, VCPU_ID, &sregs); - sregs.apic_base = 1 << 10; - rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); - TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)", - sregs.apic_base); - sregs.apic_base = 1 << 11; - rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); - TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", - sregs.apic_base); - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/state_test.c b/tools/testing/selftests/kvm/state_test.c deleted file mode 100644 index cdf82735d6e7..000000000000 --- a/tools/testing/selftests/kvm/state_test.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * KVM_GET/SET_* tests - * - * Copyright (C) 2018, Red Hat, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Tests for vCPU state save/restore, including nested guest state. - */ -#define _GNU_SOURCE /* for program_invocation_short_name */ -#include -#include -#include -#include -#include - -#include "test_util.h" - -#include "kvm_util.h" -#include "x86.h" -#include "vmx.h" - -#define VCPU_ID 5 - -static bool have_nested_state; - -void l2_guest_code(void) -{ - GUEST_SYNC(5); - - /* Exit to L1 */ - vmcall(); - - /* L1 has now set up a shadow VMCS for us. */ - GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); - GUEST_SYNC(9); - GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); - GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee)); - GUEST_SYNC(10); - GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee); - GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee)); - GUEST_SYNC(11); - - /* Done, exit to L1 and never come back. */ - vmcall(); -} - -void l1_guest_code(struct vmx_pages *vmx_pages) -{ -#define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - - GUEST_ASSERT(vmx_pages->vmcs_gpa); - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); - - GUEST_SYNC(3); - GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); - - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - GUEST_SYNC(4); - GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - /* Check that the launched state is preserved. */ - GUEST_ASSERT(vmlaunch()); - - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - GUEST_SYNC(6); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); - - vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); - vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa); - - GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); - GUEST_ASSERT(vmlaunch()); - GUEST_SYNC(7); - GUEST_ASSERT(vmlaunch()); - GUEST_ASSERT(vmresume()); - - vmwrite(GUEST_RIP, 0xc0ffee); - GUEST_SYNC(8); - GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); - - GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa)); - GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); - GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); - GUEST_ASSERT(vmlaunch()); - GUEST_ASSERT(vmresume()); - GUEST_SYNC(12); - GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); - GUEST_ASSERT(vmlaunch()); - GUEST_ASSERT(vmresume()); -} - -void guest_code(struct vmx_pages *vmx_pages) -{ - GUEST_SYNC(1); - GUEST_SYNC(2); - - if (vmx_pages) - l1_guest_code(vmx_pages); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct vmx_pages *vmx_pages = NULL; - vm_vaddr_t vmx_pages_gva = 0; - - struct kvm_regs regs1, regs2; - struct kvm_vm *vm; - struct kvm_run *run; - struct kvm_x86_state *state; - struct ucall uc; - int stage; - - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - run = vcpu_state(vm, VCPU_ID); - - vcpu_regs_get(vm, VCPU_ID, ®s1); - - if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { - vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); - } else { - printf("will skip nested state checks\n"); - vcpu_args_set(vm, VCPU_ID, 1, 0); - } - - for (stage = 1;; stage++) { - _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s),\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vm, VCPU_ID, ®s1); - switch (get_ucall(vm, VCPU_ID, &uc)) { - case UCALL_ABORT: - TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], - __FILE__, uc.args[1]); - /* NOT REACHED */ - case UCALL_SYNC: - break; - case UCALL_DONE: - goto done; - default: - TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); - } - - /* UCALL_SYNC is handled here. */ - TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", - stage, (ulong)uc.args[1]); - - state = vcpu_save_state(vm, VCPU_ID); - kvm_vm_release(vm); - - /* Restore state in a new VM. */ - kvm_vm_restart(vm, O_RDWR); - vm_vcpu_add(vm, VCPU_ID, 0, 0); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - vcpu_load_state(vm, VCPU_ID, state); - run = vcpu_state(vm, VCPU_ID); - free(state); - - memset(®s2, 0, sizeof(regs2)); - vcpu_regs_get(vm, VCPU_ID, ®s2); - TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), - "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", - (ulong) regs2.rdi, (ulong) regs2.rsi); - } - -done: - kvm_vm_free(vm); -} diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c deleted file mode 100644 index 213343e5dff9..000000000000 --- a/tools/testing/selftests/kvm/sync_regs_test.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Test for x86 KVM_CAP_SYNC_REGS - * - * Copyright (C) 2018, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality, - * including requesting an invalid register set, updates to/from values - * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled. - */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "x86.h" - -#define VCPU_ID 5 - -void guest_code(void) -{ - for (;;) { - GUEST_SYNC(0); - asm volatile ("inc %r11"); - } -} - -static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) -{ -#define REG_COMPARE(reg) \ - TEST_ASSERT(left->reg == right->reg, \ - "Register " #reg \ - " values did not match: 0x%llx, 0x%llx\n", \ - left->reg, right->reg) - REG_COMPARE(rax); - REG_COMPARE(rbx); - REG_COMPARE(rcx); - REG_COMPARE(rdx); - REG_COMPARE(rsi); - REG_COMPARE(rdi); - REG_COMPARE(rsp); - REG_COMPARE(rbp); - REG_COMPARE(r8); - REG_COMPARE(r9); - REG_COMPARE(r10); - REG_COMPARE(r11); - REG_COMPARE(r12); - REG_COMPARE(r13); - REG_COMPARE(r14); - REG_COMPARE(r15); - REG_COMPARE(rip); - REG_COMPARE(rflags); -#undef REG_COMPARE -} - -static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right) -{ -} - -static void compare_vcpu_events(struct kvm_vcpu_events *left, - struct kvm_vcpu_events *right) -{ -} - -#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS) -#define INVALID_SYNC_FIELD 0x80000000 - -int main(int argc, char *argv[]) -{ - struct kvm_vm *vm; - struct kvm_run *run; - struct kvm_regs regs; - struct kvm_sregs sregs; - struct kvm_vcpu_events events; - int rv, cap; - - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - - cap = kvm_check_cap(KVM_CAP_SYNC_REGS); - if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) { - fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n"); - exit(KSFT_SKIP); - } - if ((cap & INVALID_SYNC_FIELD) != 0) { - fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n"); - exit(KSFT_SKIP); - } - - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - - run = vcpu_state(vm, VCPU_ID); - - /* Request reading invalid register set from VCPU. */ - run->kvm_valid_regs = INVALID_SYNC_FIELD; - rv = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", - rv); - vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; - - run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; - rv = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", - rv); - vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; - - /* Request setting invalid register set into VCPU. */ - run->kvm_dirty_regs = INVALID_SYNC_FIELD; - rv = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", - rv); - vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; - - run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; - rv = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", - rv); - vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; - - /* Request and verify all valid register sets. */ - /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ - run->kvm_valid_regs = TEST_SYNC_FIELDS; - rv = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s),\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - vcpu_regs_get(vm, VCPU_ID, ®s); - compare_regs(®s, &run->s.regs.regs); - - vcpu_sregs_get(vm, VCPU_ID, &sregs); - compare_sregs(&sregs, &run->s.regs.sregs); - - vcpu_events_get(vm, VCPU_ID, &events); - compare_vcpu_events(&events, &run->s.regs.events); - - /* Set and verify various register values. */ - run->s.regs.regs.r11 = 0xBAD1DEA; - run->s.regs.sregs.apic_base = 1 << 11; - /* TODO run->s.regs.events.XYZ = ABC; */ - - run->kvm_valid_regs = TEST_SYNC_FIELDS; - run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; - rv = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s),\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); - TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.regs.r11); - TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11, - "apic_base sync regs value incorrect 0x%llx.", - run->s.regs.sregs.apic_base); - - vcpu_regs_get(vm, VCPU_ID, ®s); - compare_regs(®s, &run->s.regs.regs); - - vcpu_sregs_get(vm, VCPU_ID, &sregs); - compare_sregs(&sregs, &run->s.regs.sregs); - - vcpu_events_get(vm, VCPU_ID, &events); - compare_vcpu_events(&events, &run->s.regs.events); - - /* Clear kvm_dirty_regs bits, verify new s.regs values are - * overwritten with existing guest values. - */ - run->kvm_valid_regs = TEST_SYNC_FIELDS; - run->kvm_dirty_regs = 0; - run->s.regs.regs.r11 = 0xDEADBEEF; - rv = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s),\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); - TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.regs.r11); - - /* Clear kvm_valid_regs bits and kvm_dirty_bits. - * Verify s.regs values are not overwritten with existing guest values - * and that guest values are not overwritten with kvm_sync_regs values. - */ - run->kvm_valid_regs = 0; - run->kvm_dirty_regs = 0; - run->s.regs.regs.r11 = 0xAAAA; - regs.r11 = 0xBAC0; - vcpu_regs_set(vm, VCPU_ID, ®s); - rv = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s),\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); - TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.regs.r11); - vcpu_regs_get(vm, VCPU_ID, ®s); - TEST_ASSERT(regs.r11 == 0xBAC0 + 1, - "r11 guest value incorrect 0x%llx.", - regs.r11); - - /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten - * with existing guest values but that guest values are overwritten - * with kvm_sync_regs values. - */ - run->kvm_valid_regs = 0; - run->kvm_dirty_regs = TEST_SYNC_FIELDS; - run->s.regs.regs.r11 = 0xBBBB; - rv = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s),\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); - TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.regs.r11); - vcpu_regs_get(vm, VCPU_ID, ®s); - TEST_ASSERT(regs.r11 == 0xBBBB + 1, - "r11 guest value incorrect 0x%llx.", - regs.r11); - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c deleted file mode 100644 index 8d487c78796a..000000000000 --- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * gtests/tests/vmx_tsc_adjust_test.c - * - * Copyright (C) 2018, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * - * IA32_TSC_ADJUST test - * - * According to the SDM, "if an execution of WRMSR to the - * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC, - * the logical processor also adds (or subtracts) value X from the - * IA32_TSC_ADJUST MSR. - * - * Note that when L1 doesn't intercept writes to IA32_TSC, a - * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC - * value. - * - * This test verifies that this unusual case is handled correctly. - */ - -#include "test_util.h" -#include "kvm_util.h" -#include "x86.h" -#include "vmx.h" - -#include -#include - -#include "../kselftest.h" - -#ifndef MSR_IA32_TSC_ADJUST -#define MSR_IA32_TSC_ADJUST 0x3b -#endif - -#define PAGE_SIZE 4096 -#define VCPU_ID 5 - -#define TSC_ADJUST_VALUE (1ll << 32) -#define TSC_OFFSET_VALUE -(1ll << 48) - -enum { - PORT_ABORT = 0x1000, - PORT_REPORT, - PORT_DONE, -}; - -enum { - VMXON_PAGE = 0, - VMCS_PAGE, - MSR_BITMAP_PAGE, - - NUM_VMX_PAGES, -}; - -struct kvm_single_msr { - struct kvm_msrs header; - struct kvm_msr_entry entry; -} __attribute__((packed)); - -/* The virtual machine object. */ -static struct kvm_vm *vm; - -static void check_ia32_tsc_adjust(int64_t max) -{ - int64_t adjust; - - adjust = rdmsr(MSR_IA32_TSC_ADJUST); - GUEST_SYNC(adjust); - GUEST_ASSERT(adjust <= max); -} - -static void l2_guest_code(void) -{ - uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; - - wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); - check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); - - /* Exit to L1 */ - __asm__ __volatile__("vmcall"); -} - -static void l1_guest_code(struct vmx_pages *vmx_pages) -{ -#define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; - uintptr_t save_cr3; - - GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); - wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); - check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - - /* Prepare the VMCS for L2 execution. */ - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); - control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING; - vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); - vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); - - /* Jump into L2. First, test failure to load guest CR3. */ - save_cr3 = vmreadz(GUEST_CR3); - vmwrite(GUEST_CR3, -1ull); - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == - (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); - check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - vmwrite(GUEST_CR3, save_cr3); - - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - - check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); - - GUEST_DONE(); -} - -void report(int64_t val) -{ - printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", - val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); -} - -int main(int argc, char *argv[]) -{ - struct vmx_pages *vmx_pages; - vm_vaddr_t vmx_pages_gva; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - - if (!(entry->ecx & CPUID_VMX)) { - fprintf(stderr, "nested VMX not enabled, skipping test\n"); - exit(KSFT_SKIP); - } - - vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); - - for (;;) { - volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); - struct ucall uc; - - vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - switch (get_ucall(vm, VCPU_ID, &uc)) { - case UCALL_ABORT: - TEST_ASSERT(false, "%s", (const char *)uc.args[0]); - /* NOT REACHED */ - case UCALL_SYNC: - report(uc.args[1]); - break; - case UCALL_DONE: - goto done; - default: - TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); - } - } - - kvm_vm_free(vm); -done: - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c new file mode 100644 index 000000000000..d503a51fad30 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CR4 and CPUID sync test + * + * Copyright 2018, Red Hat, Inc. and/or its affiliates. + * + * Author: + * Wei Huang + */ + +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" + +#define X86_FEATURE_XSAVE (1<<26) +#define X86_FEATURE_OSXSAVE (1<<27) +#define VCPU_ID 1 + +static inline bool cr4_cpuid_is_sync(void) +{ + int func, subfunc; + uint32_t eax, ebx, ecx, edx; + uint64_t cr4; + + func = 0x1; + subfunc = 0x0; + __asm__ __volatile__("cpuid" + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(func), "c"(subfunc)); + + cr4 = get_cr4(); + + return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE)); +} + +static void guest_code(void) +{ + uint64_t cr4; + + /* turn on CR4.OSXSAVE */ + cr4 = get_cr4(); + cr4 |= X86_CR4_OSXSAVE; + set_cr4(cr4); + + /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ + GUEST_ASSERT(cr4_cpuid_is_sync()); + + /* notify hypervisor to change CR4 */ + GUEST_SYNC(0); + + /* check again */ + GUEST_ASSERT(cr4_cpuid_is_sync()); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_run *run; + struct kvm_vm *vm; + struct kvm_sregs sregs; + struct kvm_cpuid_entry2 *entry; + struct ucall uc; + int rc; + + entry = kvm_get_supported_cpuid_entry(1); + if (!(entry->ecx & X86_FEATURE_XSAVE)) { + printf("XSAVE feature not supported, skipping test\n"); + return 0; + } + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + run = vcpu_state(vm, VCPU_ID); + + while (1) { + rc = _vcpu_run(vm, VCPU_ID); + + if (run->exit_reason == KVM_EXIT_IO) { + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + /* emulate hypervisor clearing CR4.OSXSAVE */ + vcpu_sregs_get(vm, VCPU_ID, &sregs); + sregs.cr4 &= ~X86_CR4_OSXSAVE; + vcpu_sregs_set(vm, VCPU_ID, &sregs); + break; + case UCALL_ABORT: + TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); + break; + case UCALL_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + } + } + } + + kvm_vm_free(vm); + +done: + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_test.c new file mode 100644 index 000000000000..7cf3e4ae6046 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/dirty_log_test.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging test + * + * Copyright (C) 2018, Red Hat, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" + +#define DEBUG printf + +#define VCPU_ID 1 +/* The memory slot index to track dirty pages */ +#define TEST_MEM_SLOT_INDEX 1 +/* + * GPA offset of the testing memory slot. Must be bigger than the + * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES. + */ +#define TEST_MEM_OFFSET (1ULL << 30) /* 1G */ +/* Size of the testing memory slot */ +#define TEST_MEM_PAGES (1ULL << 18) /* 1G for 4K pages */ +/* How many pages to dirty for each guest loop */ +#define TEST_PAGES_PER_LOOP 1024 +/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */ +#define TEST_HOST_LOOP_N 32 +/* Interval for each host loop (ms) */ +#define TEST_HOST_LOOP_INTERVAL 10 + +/* + * Guest variables. We use these variables to share data between host + * and guest. There are two copies of the variables, one in host memory + * (which is unused) and one in guest memory. When the host wants to + * access these variables, it needs to call addr_gva2hva() to access the + * guest copy. + */ +uint64_t guest_random_array[TEST_PAGES_PER_LOOP]; +uint64_t guest_iteration; +uint64_t guest_page_size; + +/* + * Writes to the first byte of a random page within the testing memory + * region continuously. + */ +void guest_code(void) +{ + int i = 0; + uint64_t volatile *array = guest_random_array; + uint64_t volatile *guest_addr; + + while (true) { + for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { + /* + * Write to the first 8 bytes of a random page + * on the testing memory region. + */ + guest_addr = (uint64_t *) + (TEST_MEM_OFFSET + + (array[i] % TEST_MEM_PAGES) * guest_page_size); + *guest_addr = guest_iteration; + } + /* Tell the host that we need more random numbers */ + GUEST_SYNC(1); + } +} + +/* + * Host variables. These variables should only be used by the host + * rather than the guest. + */ +bool host_quit; + +/* Points to the test VM memory region on which we track dirty logs */ +void *host_test_mem; + +/* For statistics only */ +uint64_t host_dirty_count; +uint64_t host_clear_count; +uint64_t host_track_next_count; + +/* + * We use this bitmap to track some pages that should have its dirty + * bit set in the _next_ iteration. For example, if we detected the + * page value changed to current iteration but at the same time the + * page bit is cleared in the latest bitmap, then the system must + * report that write in the next get dirty log call. + */ +unsigned long *host_bmap_track; + +void generate_random_array(uint64_t *guest_array, uint64_t size) +{ + uint64_t i; + + for (i = 0; i < size; i++) { + guest_array[i] = random(); + } +} + +void *vcpu_worker(void *data) +{ + int ret; + uint64_t loops, *guest_array, pages_count = 0; + struct kvm_vm *vm = data; + struct kvm_run *run; + struct ucall uc; + + run = vcpu_state(vm, VCPU_ID); + + /* Retrieve the guest random array pointer and cache it */ + guest_array = addr_gva2hva(vm, (vm_vaddr_t)guest_random_array); + + DEBUG("VCPU starts\n"); + + generate_random_array(guest_array, TEST_PAGES_PER_LOOP); + + while (!READ_ONCE(host_quit)) { + /* Let the guest to dirty these random pages */ + ret = _vcpu_run(vm, VCPU_ID); + if (run->exit_reason == KVM_EXIT_IO && + get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) { + pages_count += TEST_PAGES_PER_LOOP; + generate_random_array(guest_array, TEST_PAGES_PER_LOOP); + } else { + TEST_ASSERT(false, + "Invalid guest sync status: " + "exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + } + } + + DEBUG("VCPU exits, dirtied %"PRIu64" pages\n", pages_count); + + return NULL; +} + +void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration) +{ + uint64_t page; + uint64_t volatile *value_ptr; + + for (page = 0; page < TEST_MEM_PAGES; page++) { + value_ptr = host_test_mem + page * getpagesize(); + + /* If this is a special page that we were tracking... */ + if (test_and_clear_bit(page, host_bmap_track)) { + host_track_next_count++; + TEST_ASSERT(test_bit(page, bmap), + "Page %"PRIu64" should have its dirty bit " + "set in this iteration but it is missing", + page); + } + + if (test_bit(page, bmap)) { + host_dirty_count++; + /* + * If the bit is set, the value written onto + * the corresponding page should be either the + * previous iteration number or the current one. + */ + TEST_ASSERT(*value_ptr == iteration || + *value_ptr == iteration - 1, + "Set page %"PRIu64" value %"PRIu64 + " incorrect (iteration=%"PRIu64")", + page, *value_ptr, iteration); + } else { + host_clear_count++; + /* + * If cleared, the value written can be any + * value smaller or equals to the iteration + * number. Note that the value can be exactly + * (iteration-1) if that write can happen + * like this: + * + * (1) increase loop count to "iteration-1" + * (2) write to page P happens (with value + * "iteration-1") + * (3) get dirty log for "iteration-1"; we'll + * see that page P bit is set (dirtied), + * and not set the bit in host_bmap_track + * (4) increase loop count to "iteration" + * (which is current iteration) + * (5) get dirty log for current iteration, + * we'll see that page P is cleared, with + * value "iteration-1". + */ + TEST_ASSERT(*value_ptr <= iteration, + "Clear page %"PRIu64" value %"PRIu64 + " incorrect (iteration=%"PRIu64")", + page, *value_ptr, iteration); + if (*value_ptr == iteration) { + /* + * This page is _just_ modified; it + * should report its dirtyness in the + * next run + */ + set_bit(page, host_bmap_track); + } + } + } +} + +void help(char *name) +{ + puts(""); + printf("usage: %s [-i iterations] [-I interval] [-h]\n", name); + puts(""); + printf(" -i: specify iteration counts (default: %"PRIu64")\n", + TEST_HOST_LOOP_N); + printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", + TEST_HOST_LOOP_INTERVAL); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + pthread_t vcpu_thread; + struct kvm_vm *vm; + uint64_t volatile *psize, *iteration; + unsigned long *bmap, iterations = TEST_HOST_LOOP_N, + interval = TEST_HOST_LOOP_INTERVAL; + int opt; + + while ((opt = getopt(argc, argv, "hi:I:")) != -1) { + switch (opt) { + case 'i': + iterations = strtol(optarg, NULL, 10); + break; + case 'I': + interval = strtol(optarg, NULL, 10); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + TEST_ASSERT(iterations > 2, "Iteration must be bigger than zero\n"); + TEST_ASSERT(interval > 0, "Interval must be bigger than zero"); + + DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", + iterations, interval); + + srandom(time(0)); + + bmap = bitmap_alloc(TEST_MEM_PAGES); + host_bmap_track = bitmap_alloc(TEST_MEM_PAGES); + + vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code); + + /* Add an extra memory slot for testing dirty logging */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + TEST_MEM_OFFSET, + TEST_MEM_SLOT_INDEX, + TEST_MEM_PAGES, + KVM_MEM_LOG_DIRTY_PAGES); + /* Cache the HVA pointer of the region */ + host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET); + + /* Do 1:1 mapping for the dirty track memory slot */ + virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET, + TEST_MEM_PAGES * getpagesize(), 0); + + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + /* Tell the guest about the page size on the system */ + psize = addr_gva2hva(vm, (vm_vaddr_t)&guest_page_size); + *psize = getpagesize(); + + /* Start the iterations */ + iteration = addr_gva2hva(vm, (vm_vaddr_t)&guest_iteration); + *iteration = 1; + + /* Start dirtying pages */ + pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); + + while (*iteration < iterations) { + /* Give the vcpu thread some time to dirty some pages */ + usleep(interval * 1000); + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + vm_dirty_log_verify(bmap, *iteration); + (*iteration)++; + } + + /* Tell the vcpu thread to quit */ + host_quit = true; + pthread_join(vcpu_thread, NULL); + + DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " + "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count, + host_track_next_count); + + free(bmap); + free(host_bmap_track); + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c new file mode 100644 index 000000000000..eb3e7a838cb4 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for x86 KVM_CAP_MSR_PLATFORM_INFO + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Verifies expected behavior of controlling guest access to + * MSR_PLATFORM_INFO. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 0 +#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00 + +static void guest_code(void) +{ + uint64_t msr_platform_info; + + for (;;) { + msr_platform_info = rdmsr(MSR_PLATFORM_INFO); + GUEST_SYNC(msr_platform_info); + asm volatile ("inc %r11"); + } +} + +static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable) +{ + struct kvm_enable_cap cap = {}; + + cap.cap = KVM_CAP_MSR_PLATFORM_INFO; + cap.flags = 0; + cap.args[0] = (int)enable; + vm_enable_cap(vm, &cap); +} + +static void test_msr_platform_info_enabled(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + set_msr_platform_info_enabled(vm, true); + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Exit_reason other than KVM_EXIT_IO: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + get_ucall(vm, VCPU_ID, &uc); + TEST_ASSERT(uc.cmd == UCALL_SYNC, + "Received ucall other than UCALL_SYNC: %u\n", + ucall); + TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == + MSR_PLATFORM_INFO_MAX_TURBO_RATIO, + "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.", + MSR_PLATFORM_INFO_MAX_TURBO_RATIO); +} + +static void test_msr_platform_info_disabled(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + set_msr_platform_info_enabled(vm, false); + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_run *state; + int rv; + uint64_t msr_platform_info; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO); + if (!rv) { + fprintf(stderr, + "KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n"); + exit(KSFT_SKIP); + } + + vm = vm_create_default(VCPU_ID, 0, guest_code); + + msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO); + vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, + msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); + test_msr_platform_info_disabled(vm); + test_msr_platform_info_enabled(vm); + vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c new file mode 100644 index 000000000000..35640e8e95bc --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c @@ -0,0 +1,54 @@ +/* + * KVM_SET_SREGS tests + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * This is a regression test for the bug fixed by the following commit: + * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values") + * + * That bug allowed a user-mode program that called the KVM_SET_SREGS + * ioctl to put a VCPU's local APIC into an invalid state. + * + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 5 + +int main(int argc, char *argv[]) +{ + struct kvm_sregs sregs; + struct kvm_vm *vm; + int rc; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, NULL); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + sregs.apic_base = 1 << 10; + rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)", + sregs.apic_base); + sregs.apic_base = 1 << 11; + rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", + sregs.apic_base); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c new file mode 100644 index 000000000000..43df194a7c1e --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -0,0 +1,197 @@ +/* + * KVM_GET/SET_* tests + * + * Copyright (C) 2018, Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Tests for vCPU state save/restore, including nested guest state. + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define VCPU_ID 5 + +static bool have_nested_state; + +void l2_guest_code(void) +{ + GUEST_SYNC(5); + + /* Exit to L1 */ + vmcall(); + + /* L1 has now set up a shadow VMCS for us. */ + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); + GUEST_SYNC(9); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); + GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee)); + GUEST_SYNC(10); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee); + GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee)); + GUEST_SYNC(11); + + /* Done, exit to L1 and never come back. */ + vmcall(); +} + +void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(vmx_pages->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + + GUEST_SYNC(3); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(4); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + /* Check that the launched state is preserved. */ + GUEST_ASSERT(vmlaunch()); + + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_SYNC(6); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); + + vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); + vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa); + + GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); + GUEST_ASSERT(vmlaunch()); + GUEST_SYNC(7); + GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(vmresume()); + + vmwrite(GUEST_RIP, 0xc0ffee); + GUEST_SYNC(8); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); + + GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa)); + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); + GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(vmresume()); + GUEST_SYNC(12); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); + GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(vmresume()); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + GUEST_SYNC(1); + GUEST_SYNC(2); + + if (vmx_pages) + l1_guest_code(vmx_pages); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct vmx_pages *vmx_pages = NULL; + vm_vaddr_t vmx_pages_gva = 0; + + struct kvm_regs regs1, regs2; + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_x86_state *state; + struct ucall uc; + int stage; + + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + run = vcpu_state(vm, VCPU_ID); + + vcpu_regs_get(vm, VCPU_ID, ®s1); + + if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { + vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + } else { + printf("will skip nested state checks\n"); + vcpu_args_set(vm, VCPU_ID, 1, 0); + } + + for (stage = 1;; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + } + + /* UCALL_SYNC is handled here. */ + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", + stage, (ulong)uc.args[1]); + + state = vcpu_save_state(vm, VCPU_ID); + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + kvm_vm_restart(vm, O_RDWR); + vm_vcpu_add(vm, VCPU_ID, 0, 0); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_load_state(vm, VCPU_ID, state); + run = vcpu_state(vm, VCPU_ID); + free(state); + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vm, VCPU_ID, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); + } + +done: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c new file mode 100644 index 000000000000..c8478ce9ea77 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -0,0 +1,237 @@ +/* + * Test for x86 KVM_CAP_SYNC_REGS + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality, + * including requesting an invalid register set, updates to/from values + * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 5 + +void guest_code(void) +{ + for (;;) { + GUEST_SYNC(0); + asm volatile ("inc %r11"); + } +} + +static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) +{ +#define REG_COMPARE(reg) \ + TEST_ASSERT(left->reg == right->reg, \ + "Register " #reg \ + " values did not match: 0x%llx, 0x%llx\n", \ + left->reg, right->reg) + REG_COMPARE(rax); + REG_COMPARE(rbx); + REG_COMPARE(rcx); + REG_COMPARE(rdx); + REG_COMPARE(rsi); + REG_COMPARE(rdi); + REG_COMPARE(rsp); + REG_COMPARE(rbp); + REG_COMPARE(r8); + REG_COMPARE(r9); + REG_COMPARE(r10); + REG_COMPARE(r11); + REG_COMPARE(r12); + REG_COMPARE(r13); + REG_COMPARE(r14); + REG_COMPARE(r15); + REG_COMPARE(rip); + REG_COMPARE(rflags); +#undef REG_COMPARE +} + +static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right) +{ +} + +static void compare_vcpu_events(struct kvm_vcpu_events *left, + struct kvm_vcpu_events *right) +{ +} + +#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS) +#define INVALID_SYNC_FIELD 0x80000000 + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_vcpu_events events; + int rv, cap; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + cap = kvm_check_cap(KVM_CAP_SYNC_REGS); + if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) { + fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n"); + exit(KSFT_SKIP); + } + if ((cap & INVALID_SYNC_FIELD) != 0) { + fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n"); + exit(KSFT_SKIP); + } + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + + run = vcpu_state(vm, VCPU_ID); + + /* Request reading invalid register set from VCPU. */ + run->kvm_valid_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + + run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + + /* Request setting invalid register set into VCPU. */ + run->kvm_dirty_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + + run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + + /* Request and verify all valid register sets. */ + /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + vcpu_regs_get(vm, VCPU_ID, ®s); + compare_regs(®s, &run->s.regs.regs); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + compare_sregs(&sregs, &run->s.regs.sregs); + + vcpu_events_get(vm, VCPU_ID, &events); + compare_vcpu_events(&events, &run->s.regs.events); + + /* Set and verify various register values. */ + run->s.regs.regs.r11 = 0xBAD1DEA; + run->s.regs.sregs.apic_base = 1 << 11; + /* TODO run->s.regs.events.XYZ = ABC; */ + + run->kvm_valid_regs = TEST_SYNC_FIELDS; + run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.regs.r11); + TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11, + "apic_base sync regs value incorrect 0x%llx.", + run->s.regs.sregs.apic_base); + + vcpu_regs_get(vm, VCPU_ID, ®s); + compare_regs(®s, &run->s.regs.regs); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + compare_sregs(&sregs, &run->s.regs.sregs); + + vcpu_events_get(vm, VCPU_ID, &events); + compare_vcpu_events(&events, &run->s.regs.events); + + /* Clear kvm_dirty_regs bits, verify new s.regs values are + * overwritten with existing guest values. + */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + run->kvm_dirty_regs = 0; + run->s.regs.regs.r11 = 0xDEADBEEF; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.regs.r11); + + /* Clear kvm_valid_regs bits and kvm_dirty_bits. + * Verify s.regs values are not overwritten with existing guest values + * and that guest values are not overwritten with kvm_sync_regs values. + */ + run->kvm_valid_regs = 0; + run->kvm_dirty_regs = 0; + run->s.regs.regs.r11 = 0xAAAA; + regs.r11 = 0xBAC0; + vcpu_regs_set(vm, VCPU_ID, ®s); + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.regs.r11); + vcpu_regs_get(vm, VCPU_ID, ®s); + TEST_ASSERT(regs.r11 == 0xBAC0 + 1, + "r11 guest value incorrect 0x%llx.", + regs.r11); + + /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten + * with existing guest values but that guest values are overwritten + * with kvm_sync_regs values. + */ + run->kvm_valid_regs = 0; + run->kvm_dirty_regs = TEST_SYNC_FIELDS; + run->s.regs.regs.r11 = 0xBBBB; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.regs.r11); + vcpu_regs_get(vm, VCPU_ID, ®s); + TEST_ASSERT(regs.r11 == 0xBBBB + 1, + "r11 guest value incorrect 0x%llx.", + regs.r11); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c new file mode 100644 index 000000000000..38a91a5f04ac --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -0,0 +1,174 @@ +/* + * vmx_tsc_adjust_test + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * + * IA32_TSC_ADJUST test + * + * According to the SDM, "if an execution of WRMSR to the + * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC, + * the logical processor also adds (or subtracts) value X from the + * IA32_TSC_ADJUST MSR. + * + * Note that when L1 doesn't intercept writes to IA32_TSC, a + * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC + * value. + * + * This test verifies that this unusual case is handled correctly. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include +#include + +#include "kselftest.h" + +#ifndef MSR_IA32_TSC_ADJUST +#define MSR_IA32_TSC_ADJUST 0x3b +#endif + +#define PAGE_SIZE 4096 +#define VCPU_ID 5 + +#define TSC_ADJUST_VALUE (1ll << 32) +#define TSC_OFFSET_VALUE -(1ll << 48) + +enum { + PORT_ABORT = 0x1000, + PORT_REPORT, + PORT_DONE, +}; + +enum { + VMXON_PAGE = 0, + VMCS_PAGE, + MSR_BITMAP_PAGE, + + NUM_VMX_PAGES, +}; + +struct kvm_single_msr { + struct kvm_msrs header; + struct kvm_msr_entry entry; +} __attribute__((packed)); + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +static void check_ia32_tsc_adjust(int64_t max) +{ + int64_t adjust; + + adjust = rdmsr(MSR_IA32_TSC_ADJUST); + GUEST_SYNC(adjust); + GUEST_ASSERT(adjust <= max); +} + +static void l2_guest_code(void) +{ + uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; + + wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + uintptr_t save_cr3; + + GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); + wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + /* Jump into L2. First, test failure to load guest CR3. */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + vmwrite(GUEST_CR3, save_cr3); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + GUEST_DONE(); +} + +void report(int64_t val) +{ + printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", + val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); +} + +int main(int argc, char *argv[]) +{ + struct vmx_pages *vmx_pages; + vm_vaddr_t vmx_pages_gva; + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + if (!(entry->ecx & CPUID_VMX)) { + fprintf(stderr, "nested VMX not enabled, skipping test\n"); + exit(KSFT_SKIP); + } + + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + /* Allocate VMX pages and shared descriptors (vmx_pages). */ + vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + + for (;;) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s", (const char *)uc.args[0]); + /* NOT REACHED */ + case UCALL_SYNC: + report(uc.args[1]); + break; + case UCALL_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + } + } + + kvm_vm_free(vm); +done: + return 0; +} -- cgit v1.2.3 From eea192bfd96db157151d27dd9c5e3e6a50c5d6be Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:27 +0200 Subject: kvm: selftests: add cscope make target Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 734a60da9776..959be146f789 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -29,7 +29,7 @@ include ../lib.mk STATIC_LIBS := $(OUTPUT)/libkvm.a LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) -EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) +EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.* x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c @@ -41,3 +41,12 @@ $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) all: $(STATIC_LIBS) $(TEST_GEN_PROGS): $(STATIC_LIBS) $(STATIC_LIBS):| khdr + +cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. +cscope: + $(RM) cscope.* + (find $(include_paths) -name '*.h' \ + -exec realpath --relative-base=$(PWD) {} \;; \ + find . -name '*.c' \ + -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files + cscope -b -- cgit v1.2.3 From eabe7881d264fd8ee074636fe50c8a8af08fedfd Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:28 +0200 Subject: kvm: selftests: tidy up kvm_util Tidy up kvm-util code: code/comment formatting, remove unused code, and move x86 specific code out. We also move vcpu_dump() out of common code, because not all arches (AArch64) have KVM_GET_REGS. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util.h | 80 ++-- .../selftests/kvm/include/x86_64/processor.h | 20 +- tools/testing/selftests/kvm/lib/kvm_util.c | 438 +++++---------------- .../testing/selftests/kvm/lib/kvm_util_internal.h | 22 +- tools/testing/selftests/kvm/lib/x86_64/processor.c | 245 ++++++++++++ .../testing/selftests/kvm/x86_64/dirty_log_test.c | 1 + 6 files changed, 407 insertions(+), 399 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 7830c46f27d8..4d5e405ff246 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -17,12 +17,6 @@ #include "sparsebit.h" -/* - * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be - * created. Only applies to VMs using EPT. - */ -#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul - /* Callers of kvm_util only have an incomplete/opaque description of the * structure kvm_util is using to maintain the state of a VM. @@ -33,11 +27,11 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ /* Minimum allocated guest virtual and physical addresses */ -#define KVM_UTIL_MIN_VADDR 0x2000 +#define KVM_UTIL_MIN_VADDR 0x2000 #define DEFAULT_GUEST_PHY_PAGES 512 #define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 -#define DEFAULT_STACK_PGS 5 +#define DEFAULT_STACK_PGS 5 enum vm_guest_mode { VM_MODE_FLAT48PG, @@ -58,15 +52,15 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log); -int kvm_memcmp_hva_gva(void *hva, - struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); +int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, + size_t len); void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename, - uint32_t data_memslot, uint32_t pgd_memslot); + uint32_t data_memslot, uint32_t pgd_memslot); void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); -void vcpu_dump(FILE *stream, struct kvm_vm *vm, - uint32_t vcpuid, uint8_t indent); +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, + uint8_t indent); void vm_create_irqchip(struct kvm_vm *vm); @@ -75,13 +69,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, uint64_t guest_paddr, uint32_t slot, uint64_t npages, uint32_t flags); -void vcpu_ioctl(struct kvm_vm *vm, - uint32_t vcpuid, unsigned long ioctl, void *arg); +void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, + void *arg); void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); -void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot); +void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, + int gdt_memslot); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - uint32_t data_memslot, uint32_t pgd_memslot); + uint32_t data_memslot, uint32_t pgd_memslot); void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, size_t size, uint32_t pgd_memslot); void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); @@ -93,56 +88,33 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_mp_state *mp_state); -void vcpu_regs_get(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_regs *regs); -void vcpu_regs_set(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_regs *regs); + struct kvm_mp_state *mp_state); +void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); +void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...); -void vcpu_sregs_get(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_sregs *sregs); -void vcpu_sregs_set(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_sregs *sregs); -int _vcpu_sregs_set(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_sregs *sregs); +void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_sregs *sregs); +void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_sregs *sregs); +int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_sregs *sregs); void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events); + struct kvm_vcpu_events *events); void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events); -uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index); -void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, - uint64_t msr_value); + struct kvm_vcpu_events *events); const char *exit_reason_str(unsigned int exit_reason); void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot); void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint32_t pgd_memslot); -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, - vm_paddr_t paddr_min, uint32_t memslot); - -struct kvm_cpuid2 *kvm_get_supported_cpuid(void); -void vcpu_set_cpuid( - struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); - -struct kvm_cpuid_entry2 * -kvm_get_supported_cpuid_index(uint32_t function, uint32_t index); - -static inline struct kvm_cpuid_entry2 * -kvm_get_supported_cpuid_entry(uint32_t function) -{ - return kvm_get_supported_cpuid_index(function, 0); -} + uint32_t pgd_memslot); +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, + uint32_t memslot); struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size, void *guest_code); void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); -typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr, - vm_paddr_t vmxon_paddr, - vm_vaddr_t vmcs_vaddr, - vm_paddr_t vmcs_paddr); - struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index bb76e8db9b2b..e2884c2b81ff 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -305,7 +305,25 @@ static inline unsigned long get_xmm(int n) struct kvm_x86_state; struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state); +void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_x86_state *state); + +struct kvm_cpuid2 *kvm_get_supported_cpuid(void); +void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_cpuid2 *cpuid); + +struct kvm_cpuid_entry2 * +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index); + +static inline struct kvm_cpuid_entry2 * +kvm_get_supported_cpuid_entry(uint32_t function) +{ + return kvm_get_supported_cpuid_index(function, 0); +} + +uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index); +void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, + uint64_t msr_value); /* * Basic CPU control in CR0 diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 4bd31bf04f53..1a8f8f9e3635 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -16,8 +16,6 @@ #include #include -#define KVM_DEV_PATH "/dev/kvm" - #define KVM_UTIL_PGS_PER_HUGEPG 512 #define KVM_UTIL_MIN_PADDR 0x2000 @@ -30,7 +28,8 @@ static void *align(void *x, size_t size) return (void *) (((size_t) x + mask) & ~mask); } -/* Capability +/* + * Capability * * Input Args: * cap - Capability @@ -92,13 +91,13 @@ static void vm_open(struct kvm_vm *vm, int perm) if (vm->kvm_fd < 0) exit(KSFT_SKIP); - /* Create VM. */ vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, NULL); TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " "rc: %i errno: %i", vm->fd, errno); } -/* VM Create +/* + * VM Create * * Input Args: * mode - VM Mode (e.g. VM_MODE_FLAT48PG) @@ -121,7 +120,6 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) struct kvm_vm *vm; int kvm_fd; - /* Allocate memory. */ vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficent Memory"); @@ -160,7 +158,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } -/* VM Restart +/* + * VM Restart * * Input Args: * vm - VM that has been released before @@ -187,7 +186,8 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm) " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" " guest_phys_addr: 0x%lx size: 0x%lx", - ret, errno, region->region.slot, region->region.flags, + ret, errno, region->region.slot, + region->region.flags, region->region.guest_phys_addr, region->region.memory_size); } @@ -203,7 +203,8 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) strerror(-ret)); } -/* Userspace Memory Region Find +/* + * Userspace Memory Region Find * * Input Args: * vm - Virtual Machine @@ -221,8 +222,8 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) * of the regions is returned. Null is returned only when no overlapping * region exists. */ -static struct userspace_mem_region *userspace_mem_region_find( - struct kvm_vm *vm, uint64_t start, uint64_t end) +static struct userspace_mem_region * +userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) { struct userspace_mem_region *region; @@ -238,7 +239,8 @@ static struct userspace_mem_region *userspace_mem_region_find( return NULL; } -/* KVM Userspace Memory Region Find +/* + * KVM Userspace Memory Region Find * * Input Args: * vm - Virtual Machine @@ -266,7 +268,8 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, return ®ion->region; } -/* VCPU Find +/* + * VCPU Find * * Input Args: * vm - Virtual Machine @@ -281,8 +284,7 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU * for the specified vcpuid. */ -struct vcpu *vcpu_find(struct kvm_vm *vm, - uint32_t vcpuid) +struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid) { struct vcpu *vcpup; @@ -294,7 +296,8 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, return NULL; } -/* VM VCPU Remove +/* + * VM VCPU Remove * * Input Args: * vm - Virtual Machine @@ -331,11 +334,9 @@ void kvm_vm_release(struct kvm_vm *vmp) { int ret; - /* Free VCPUs. */ while (vmp->vcpu_head) vm_vcpu_rm(vmp, vmp->vcpu_head->id); - /* Close file descriptor for the VM. */ ret = close(vmp->fd); TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); @@ -345,7 +346,8 @@ void kvm_vm_release(struct kvm_vm *vmp) " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno); } -/* Destroys and frees the VM pointed to by vmp. +/* + * Destroys and frees the VM pointed to by vmp. */ void kvm_vm_free(struct kvm_vm *vmp) { @@ -384,7 +386,8 @@ void kvm_vm_free(struct kvm_vm *vmp) free(vmp); } -/* Memory Compare, host virtual to guest virtual +/* + * Memory Compare, host virtual to guest virtual * * Input Args: * hva - Starting host virtual address @@ -406,23 +409,25 @@ void kvm_vm_free(struct kvm_vm *vmp) * a length of len, to the guest bytes starting at the guest virtual * address given by gva. */ -int kvm_memcmp_hva_gva(void *hva, - struct kvm_vm *vm, vm_vaddr_t gva, size_t len) +int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) { size_t amt; - /* Compare a batch of bytes until either a match is found + /* + * Compare a batch of bytes until either a match is found * or all the bytes have been compared. */ for (uintptr_t offset = 0; offset < len; offset += amt) { uintptr_t ptr1 = (uintptr_t)hva + offset; - /* Determine host address for guest virtual address + /* + * Determine host address for guest virtual address * at offset. */ uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); - /* Determine amount to compare on this pass. + /* + * Determine amount to compare on this pass. * Don't allow the comparsion to cross a page boundary. */ amt = len - offset; @@ -434,7 +439,8 @@ int kvm_memcmp_hva_gva(void *hva, assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); - /* Perform the comparison. If there is a difference + /* + * Perform the comparison. If there is a difference * return that result to the caller, otherwise need * to continue on looking for a mismatch. */ @@ -443,109 +449,15 @@ int kvm_memcmp_hva_gva(void *hva, return ret; } - /* No mismatch found. Let the caller know the two memory + /* + * No mismatch found. Let the caller know the two memory * areas are equal. */ return 0; } -/* Allocate an instance of struct kvm_cpuid2 - * - * Input Args: None - * - * Output Args: None - * - * Return: A pointer to the allocated struct. The caller is responsible - * for freeing this struct. - * - * Since kvm_cpuid2 uses a 0-length array to allow a the size of the - * array to be decided at allocation time, allocation is slightly - * complicated. This function uses a reasonable default length for - * the array and performs the appropriate allocation. - */ -static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) -{ - struct kvm_cpuid2 *cpuid; - int nent = 100; - size_t size; - - size = sizeof(*cpuid); - size += nent * sizeof(struct kvm_cpuid_entry2); - cpuid = malloc(size); - if (!cpuid) { - perror("malloc"); - abort(); - } - - cpuid->nent = nent; - - return cpuid; -} - -/* KVM Supported CPUID Get - * - * Input Args: None - * - * Output Args: - * - * Return: The supported KVM CPUID - * - * Get the guest CPUID supported by KVM. - */ -struct kvm_cpuid2 *kvm_get_supported_cpuid(void) -{ - static struct kvm_cpuid2 *cpuid; - int ret; - int kvm_fd; - - if (cpuid) - return cpuid; - - cpuid = allocate_kvm_cpuid2(); - kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - if (kvm_fd < 0) - exit(KSFT_SKIP); - - ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); - TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", - ret, errno); - - close(kvm_fd); - return cpuid; -} - -/* Locate a cpuid entry. - * - * Input Args: - * cpuid: The cpuid. - * function: The function of the cpuid entry to find. - * - * Output Args: None - * - * Return: A pointer to the cpuid entry. Never returns NULL. - */ -struct kvm_cpuid_entry2 * -kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) -{ - struct kvm_cpuid2 *cpuid; - struct kvm_cpuid_entry2 *entry = NULL; - int i; - - cpuid = kvm_get_supported_cpuid(); - for (i = 0; i < cpuid->nent; i++) { - if (cpuid->entries[i].function == function && - cpuid->entries[i].index == index) { - entry = &cpuid->entries[i]; - break; - } - } - - TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", - function, index); - return entry; -} - -/* VM Userspace Memory Region Add +/* + * VM Userspace Memory Region Add * * Input Args: * vm - Virtual Machine @@ -587,7 +499,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, " vm->max_gfn: 0x%lx vm->page_size: 0x%x", guest_paddr, npages, vm->max_gfn, vm->page_size); - /* Confirm a mem region with an overlapping address doesn't + /* + * Confirm a mem region with an overlapping address doesn't * already exist. */ region = (struct userspace_mem_region *) userspace_mem_region_find( @@ -678,7 +591,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, vm->userspace_mem_region_head = region; } -/* Memslot to region +/* + * Memslot to region * * Input Args: * vm - Virtual Machine @@ -692,8 +606,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, * on error (e.g. currently no memory region using memslot as a KVM * memory slot ID). */ -static struct userspace_mem_region *memslot2region(struct kvm_vm *vm, - uint32_t memslot) +static struct userspace_mem_region * +memslot2region(struct kvm_vm *vm, uint32_t memslot) { struct userspace_mem_region *region; @@ -713,7 +627,8 @@ static struct userspace_mem_region *memslot2region(struct kvm_vm *vm, return region; } -/* VM Memory Region Flags Set +/* + * VM Memory Region Flags Set * * Input Args: * vm - Virtual Machine @@ -731,7 +646,6 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) int ret; struct userspace_mem_region *region; - /* Locate memory region. */ region = memslot2region(vm, slot); region->region.flags = flags; @@ -743,7 +657,8 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) ret, errno, slot, flags); } -/* VCPU mmap Size +/* + * VCPU mmap Size * * Input Args: None * @@ -773,7 +688,8 @@ static int vcpu_mmap_sz(void) return ret; } -/* VM VCPU Add +/* + * VM VCPU Add * * Input Args: * vm - Virtual Machine @@ -786,7 +702,8 @@ static int vcpu_mmap_sz(void) * Creates and adds to the VM specified by vm and virtual CPU with * the ID given by vcpuid. */ -void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot) +void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, + int gdt_memslot) { struct vcpu *vcpu; @@ -824,7 +741,8 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_me vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot); } -/* VM Virtual Address Unused Gap +/* + * VM Virtual Address Unused Gap * * Input Args: * vm - Virtual Machine @@ -844,14 +762,14 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_me * sz unallocated bytes >= vaddr_min is available. */ static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min) + vm_vaddr_t vaddr_min) { uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; /* Determine lowest permitted virtual page index. */ uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; if ((pgidx_start * vm->page_size) < vaddr_min) - goto no_va_found; + goto no_va_found; /* Loop over section with enough valid virtual page indexes. */ if (!sparsebit_is_set_num(vm->vpages_valid, @@ -910,7 +828,8 @@ va_found: return pgidx_start * vm->page_size; } -/* VM Virtual Address Allocate +/* + * VM Virtual Address Allocate * * Input Args: * vm - Virtual Machine @@ -931,13 +850,14 @@ va_found: * a page. */ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - uint32_t data_memslot, uint32_t pgd_memslot) + uint32_t data_memslot, uint32_t pgd_memslot) { uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm, pgd_memslot); - /* Find an unused range of virtual page addresses of at least + /* + * Find an unused range of virtual page addresses of at least * pages in length. */ vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); @@ -991,7 +911,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } } -/* Address VM Physical to Host Virtual +/* + * Address VM Physical to Host Virtual * * Input Args: * vm - Virtual Machine @@ -1023,7 +944,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) return NULL; } -/* Address Host Virtual to VM Physical +/* + * Address Host Virtual to VM Physical * * Input Args: * vm - Virtual Machine @@ -1057,7 +979,8 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) return -1; } -/* VM Create IRQ Chip +/* + * VM Create IRQ Chip * * Input Args: * vm - Virtual Machine @@ -1079,7 +1002,8 @@ void vm_create_irqchip(struct kvm_vm *vm) vm->has_irqchip = true; } -/* VM VCPU State +/* + * VM VCPU State * * Input Args: * vm - Virtual Machine @@ -1101,7 +1025,8 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid) return vcpu->state; } -/* VM VCPU Run +/* + * VM VCPU Run * * Input Args: * vm - Virtual Machine @@ -1127,13 +1052,14 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) int rc; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - do { + do { rc = ioctl(vcpu->fd, KVM_RUN, NULL); } while (rc == -1 && errno == EINTR); return rc; } -/* VM VCPU Set MP State +/* + * VM VCPU Set MP State * * Input Args: * vm - Virtual Machine @@ -1148,7 +1074,7 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) * by mp_state. */ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_mp_state *mp_state) + struct kvm_mp_state *mp_state) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; @@ -1160,7 +1086,8 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, "rc: %i errno: %i", ret, errno); } -/* VM VCPU Regs Get +/* + * VM VCPU Regs Get * * Input Args: * vm - Virtual Machine @@ -1174,21 +1101,20 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, * Obtains the current register state for the VCPU specified by vcpuid * and stores it at the location given by regs. */ -void vcpu_regs_get(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_regs *regs) +void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - /* Get the regs. */ ret = ioctl(vcpu->fd, KVM_GET_REGS, regs); TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i", ret, errno); } -/* VM VCPU Regs Set +/* + * VM VCPU Regs Set * * Input Args: * vm - Virtual Machine @@ -1202,165 +1128,46 @@ void vcpu_regs_get(struct kvm_vm *vm, * Sets the regs of the VCPU specified by vcpuid to the values * given by regs. */ -void vcpu_regs_set(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_regs *regs) +void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - /* Set the regs. */ ret = ioctl(vcpu->fd, KVM_SET_REGS, regs); TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i", ret, errno); } void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events) + struct kvm_vcpu_events *events) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - /* Get the regs. */ ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events); TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i", ret, errno); } void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events) + struct kvm_vcpu_events *events) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - /* Set the regs. */ ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events); TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", ret, errno); } -/* VCPU Get MSR - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * msr_index - Index of MSR - * - * Output Args: None - * - * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. - * - * Get value of MSR for VCPU. - */ -uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - struct { - struct kvm_msrs header; - struct kvm_msr_entry entry; - } buffer = {}; - int r; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - buffer.header.nmsrs = 1; - buffer.entry.index = msr_index; - r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header); - TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" - " rc: %i errno: %i", r, errno); - - return buffer.entry.data; -} - -/* VCPU Set MSR - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * msr_index - Index of MSR - * msr_value - New value of MSR - * - * Output Args: None - * - * Return: On success, nothing. On failure a TEST_ASSERT is produced. - * - * Set value of MSR for VCPU. - */ -void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, - uint64_t msr_value) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - struct { - struct kvm_msrs header; - struct kvm_msr_entry entry; - } buffer = {}; - int r; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - memset(&buffer, 0, sizeof(buffer)); - buffer.header.nmsrs = 1; - buffer.entry.index = msr_index; - buffer.entry.data = msr_value; - r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); - TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" - " rc: %i errno: %i", r, errno); -} - -/* VM VCPU Args Set - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * num - number of arguments - * ... - arguments, each of type uint64_t - * - * Output Args: None - * - * Return: None - * - * Sets the first num function input arguments to the values - * given as variable args. Each of the variable args is expected to - * be of type uint64_t. - */ -void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) -{ - va_list ap; - struct kvm_regs regs; - - TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" - " num: %u\n", - num); - - va_start(ap, num); - vcpu_regs_get(vm, vcpuid, ®s); - - if (num >= 1) - regs.rdi = va_arg(ap, uint64_t); - - if (num >= 2) - regs.rsi = va_arg(ap, uint64_t); - - if (num >= 3) - regs.rdx = va_arg(ap, uint64_t); - - if (num >= 4) - regs.rcx = va_arg(ap, uint64_t); - - if (num >= 5) - regs.r8 = va_arg(ap, uint64_t); - - if (num >= 6) - regs.r9 = va_arg(ap, uint64_t); - - vcpu_regs_set(vm, vcpuid, ®s); - va_end(ap); -} - -/* VM VCPU System Regs Get +/* + * VM VCPU System Regs Get * * Input Args: * vm - Virtual Machine @@ -1374,22 +1181,20 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) * Obtains the current system register state for the VCPU specified by * vcpuid and stores it at the location given by sregs. */ -void vcpu_sregs_get(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_sregs *sregs) +void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - /* Get the regs. */ - /* Get the regs. */ ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs); TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i", ret, errno); } -/* VM VCPU System Regs Set +/* + * VM VCPU System Regs Set * * Input Args: * vm - Virtual Machine @@ -1403,27 +1208,25 @@ void vcpu_sregs_get(struct kvm_vm *vm, * Sets the system regs of the VCPU specified by vcpuid to the values * given by sregs. */ -void vcpu_sregs_set(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_sregs *sregs) +void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) { int ret = _vcpu_sregs_set(vm, vcpuid, sregs); TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " "rc: %i errno: %i", ret, errno); } -int _vcpu_sregs_set(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_sregs *sregs) +int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - /* Get the regs. */ return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); } -/* VCPU Ioctl +/* + * VCPU Ioctl * * Input Args: * vm - Virtual Machine @@ -1435,8 +1238,8 @@ int _vcpu_sregs_set(struct kvm_vm *vm, * * Issues an arbitrary ioctl on a VCPU fd. */ -void vcpu_ioctl(struct kvm_vm *vm, - uint32_t vcpuid, unsigned long cmd, void *arg) +void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, + unsigned long cmd, void *arg) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; @@ -1448,7 +1251,8 @@ void vcpu_ioctl(struct kvm_vm *vm, cmd, ret, errno, strerror(errno)); } -/* VM Ioctl +/* + * VM Ioctl * * Input Args: * vm - Virtual Machine @@ -1468,7 +1272,8 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) cmd, ret, errno, strerror(errno)); } -/* VM Dump +/* + * VM Dump * * Input Args: * vm - Virtual Machine @@ -1515,38 +1320,6 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) vcpu_dump(stream, vm, vcpu->id, indent + 2); } -/* VM VCPU Dump - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * indent - Left margin indent amount - * - * Output Args: - * stream - Output FILE stream - * - * Return: None - * - * Dumps the current state of the VCPU specified by vcpuid, within the VM - * given by vm, to the FILE stream given by stream. - */ -void vcpu_dump(FILE *stream, struct kvm_vm *vm, - uint32_t vcpuid, uint8_t indent) -{ - struct kvm_regs regs; - struct kvm_sregs sregs; - - fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); - - fprintf(stream, "%*sregs:\n", indent + 2, ""); - vcpu_regs_get(vm, vcpuid, ®s); - regs_dump(stream, ®s, indent + 4); - - fprintf(stream, "%*ssregs:\n", indent + 2, ""); - vcpu_sregs_get(vm, vcpuid, &sregs); - sregs_dump(stream, &sregs, indent + 4); -} - /* Known KVM exit reasons */ static struct exit_reason { unsigned int reason; @@ -1577,7 +1350,8 @@ static struct exit_reason { #endif }; -/* Exit Reason String +/* + * Exit Reason String * * Input Args: * exit_reason - Exit reason @@ -1603,7 +1377,8 @@ const char *exit_reason_str(unsigned int exit_reason) return "Unknown"; } -/* Physical Page Allocate +/* + * Physical Page Allocate * * Input Args: * vm - Virtual Machine @@ -1620,8 +1395,8 @@ const char *exit_reason_str(unsigned int exit_reason) * and its address is returned. A TEST_ASSERT failure occurs if no * page is available at or above paddr_min. */ -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, - vm_paddr_t paddr_min, uint32_t memslot) +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, + uint32_t memslot) { struct userspace_mem_region *region; sparsebit_idx_t pg; @@ -1631,17 +1406,15 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, " paddr_min: 0x%lx page_size: 0x%x", paddr_min, vm->page_size); - /* Locate memory region. */ region = memslot2region(vm, memslot); - - /* Locate next available physical page at or above paddr_min. */ pg = paddr_min >> vm->page_shift; + /* Locate next available physical page at or above paddr_min. */ if (!sparsebit_is_set(region->unused_phy_pages, pg)) { pg = sparsebit_next_set(region->unused_phy_pages, pg); if (pg == 0) { fprintf(stderr, "No guest physical page available, " - "paddr_min: 0x%lx page_size: 0x%x memslot: %u", + "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", paddr_min, vm->page_size, memslot); fputs("---- vm dump ----\n", stderr); vm_dump(stderr, vm, 2); @@ -1655,7 +1428,8 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, return pg * vm->page_size; } -/* Address Guest Virtual to Host Virtual +/* + * Address Guest Virtual to Host Virtual * * Input Args: * vm - Virtual Machine diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 36367be0fe26..3702c560bbf2 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -11,18 +11,19 @@ #include "sparsebit.h" +#define KVM_DEV_PATH "/dev/kvm" + #ifndef BITS_PER_BYTE -#define BITS_PER_BYTE 8 +#define BITS_PER_BYTE 8 #endif #ifndef BITS_PER_LONG -#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long)) +#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long)) #endif #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) -/* Concrete definition of struct kvm_vm. */ struct userspace_mem_region { struct userspace_mem_region *next, *prev; struct kvm_userspace_memory_region region; @@ -53,7 +54,6 @@ struct kvm_vm { struct userspace_mem_region *userspace_mem_region_head; struct sparsebit *vpages_valid; struct sparsebit *vpages_mapped; - bool has_irqchip; bool pgd_created; vm_paddr_t pgd; @@ -61,13 +61,11 @@ struct kvm_vm { vm_vaddr_t tss; }; -struct vcpu *vcpu_find(struct kvm_vm *vm, - uint32_t vcpuid); -void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot); +struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, + int gdt_memslot); void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); -void regs_dump(FILE *stream, struct kvm_regs *regs, - uint8_t indent); -void sregs_dump(FILE *stream, struct kvm_sregs *sregs, - uint8_t indent); +void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent); +void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent); #endif /* SELFTEST_KVM_UTIL_INTERNAL_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 79c2c5c203c0..67f84fbb0ca4 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -672,6 +672,102 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) vcpu_set_mp_state(vm, vcpuid, &mp_state); } +/* Allocate an instance of struct kvm_cpuid2 + * + * Input Args: None + * + * Output Args: None + * + * Return: A pointer to the allocated struct. The caller is responsible + * for freeing this struct. + * + * Since kvm_cpuid2 uses a 0-length array to allow a the size of the + * array to be decided at allocation time, allocation is slightly + * complicated. This function uses a reasonable default length for + * the array and performs the appropriate allocation. + */ +static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) +{ + struct kvm_cpuid2 *cpuid; + int nent = 100; + size_t size; + + size = sizeof(*cpuid); + size += nent * sizeof(struct kvm_cpuid_entry2); + cpuid = malloc(size); + if (!cpuid) { + perror("malloc"); + abort(); + } + + cpuid->nent = nent; + + return cpuid; +} + +/* KVM Supported CPUID Get + * + * Input Args: None + * + * Output Args: + * + * Return: The supported KVM CPUID + * + * Get the guest CPUID supported by KVM. + */ +struct kvm_cpuid2 *kvm_get_supported_cpuid(void) +{ + static struct kvm_cpuid2 *cpuid; + int ret; + int kvm_fd; + + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(); + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); + TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", + ret, errno); + + close(kvm_fd); + return cpuid; +} + +/* Locate a cpuid entry. + * + * Input Args: + * cpuid: The cpuid. + * function: The function of the cpuid entry to find. + * + * Output Args: None + * + * Return: A pointer to the cpuid entry. Never returns NULL. + */ +struct kvm_cpuid_entry2 * +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) +{ + struct kvm_cpuid2 *cpuid; + struct kvm_cpuid_entry2 *entry = NULL; + int i; + + cpuid = kvm_get_supported_cpuid(); + for (i = 0; i < cpuid->nent; i++) { + if (cpuid->entries[i].function == function && + cpuid->entries[i].index == index) { + entry = &cpuid->entries[i]; + break; + } + } + + TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", + function, index); + return entry; +} + /* VM VCPU CPUID Set * * Input Args: @@ -698,6 +794,7 @@ void vcpu_set_cpuid(struct kvm_vm *vm, rc, errno); } + /* Create a VM with reasonable defaults * * Input Args: @@ -742,6 +839,154 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, return vm; } +/* VCPU Get MSR + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * msr_index - Index of MSR + * + * Output Args: None + * + * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. + * + * Get value of MSR for VCPU. + */ +uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + struct { + struct kvm_msrs header; + struct kvm_msr_entry entry; + } buffer = {}; + int r; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + buffer.header.nmsrs = 1; + buffer.entry.index = msr_index; + r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header); + TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" + " rc: %i errno: %i", r, errno); + + return buffer.entry.data; +} + +/* VCPU Set MSR + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * msr_index - Index of MSR + * msr_value - New value of MSR + * + * Output Args: None + * + * Return: On success, nothing. On failure a TEST_ASSERT is produced. + * + * Set value of MSR for VCPU. + */ +void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, + uint64_t msr_value) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + struct { + struct kvm_msrs header; + struct kvm_msr_entry entry; + } buffer = {}; + int r; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + memset(&buffer, 0, sizeof(buffer)); + buffer.header.nmsrs = 1; + buffer.entry.index = msr_index; + buffer.entry.data = msr_value; + r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); + TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" + " rc: %i errno: %i", r, errno); +} + +/* VM VCPU Args Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * num - number of arguments + * ... - arguments, each of type uint64_t + * + * Output Args: None + * + * Return: None + * + * Sets the first num function input arguments to the values + * given as variable args. Each of the variable args is expected to + * be of type uint64_t. + */ +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +{ + va_list ap; + struct kvm_regs regs; + + TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" + " num: %u\n", + num); + + va_start(ap, num); + vcpu_regs_get(vm, vcpuid, ®s); + + if (num >= 1) + regs.rdi = va_arg(ap, uint64_t); + + if (num >= 2) + regs.rsi = va_arg(ap, uint64_t); + + if (num >= 3) + regs.rdx = va_arg(ap, uint64_t); + + if (num >= 4) + regs.rcx = va_arg(ap, uint64_t); + + if (num >= 5) + regs.r8 = va_arg(ap, uint64_t); + + if (num >= 6) + regs.r9 = va_arg(ap, uint64_t); + + vcpu_regs_set(vm, vcpuid, ®s); + va_end(ap); +} + +/* + * VM VCPU Dump + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * indent - Left margin indent amount + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the current state of the VCPU specified by vcpuid, within the VM + * given by vm, to the FILE stream given by stream. + */ +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +{ + struct kvm_regs regs; + struct kvm_sregs sregs; + + fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); + + fprintf(stream, "%*sregs:\n", indent + 2, ""); + vcpu_regs_get(vm, vcpuid, ®s); + regs_dump(stream, ®s, indent + 4); + + fprintf(stream, "%*ssregs:\n", indent + 2, ""); + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs_dump(stream, &sregs, indent + 4); +} + struct kvm_x86_state { struct kvm_vcpu_events events; struct kvm_mp_state mp_state; diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_test.c index 7cf3e4ae6046..395a6e07d37c 100644 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/dirty_log_test.c @@ -15,6 +15,7 @@ #include "test_util.h" #include "kvm_util.h" +#include "processor.h" #define DEBUG printf -- cgit v1.2.3 From d5106539cfef362e3870b6aa8b5ca587cd67e93d Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:29 +0200 Subject: kvm: selftests: add vm_phy_pages_alloc Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util.h | 2 + tools/testing/selftests/kvm/lib/kvm_util.c | 60 ++++++++++++++++---------- 2 files changed, 39 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 4d5e405ff246..e0c9cf4d1c49 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -110,6 +110,8 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint32_t pgd_memslot); vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, uint32_t memslot); +vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot); struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size, void *guest_code); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 1a8f8f9e3635..26c3c4350a88 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1378,10 +1378,11 @@ const char *exit_reason_str(unsigned int exit_reason) } /* - * Physical Page Allocate + * Physical Contiguous Page Allocator * * Input Args: * vm - Virtual Machine + * num - number of pages * paddr_min - Physical address minimum * memslot - Memory region to allocate page from * @@ -1390,16 +1391,18 @@ const char *exit_reason_str(unsigned int exit_reason) * Return: * Starting physical address * - * Within the VM specified by vm, locates an available physical page - * at or above paddr_min. If found, the page is marked as in use - * and its address is returned. A TEST_ASSERT failure occurs if no - * page is available at or above paddr_min. + * Within the VM specified by vm, locates a range of available physical + * pages at or above paddr_min. If found, the pages are marked as in use + * and thier base address is returned. A TEST_ASSERT failure occurs if + * not enough pages are available at or above paddr_min. */ -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, - uint32_t memslot) +vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot) { struct userspace_mem_region *region; - sparsebit_idx_t pg; + sparsebit_idx_t pg, base; + + TEST_ASSERT(num > 0, "Must allocate at least one page"); TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " "not divisible by page size.\n" @@ -1407,25 +1410,36 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, paddr_min, vm->page_size); region = memslot2region(vm, memslot); - pg = paddr_min >> vm->page_shift; - - /* Locate next available physical page at or above paddr_min. */ - if (!sparsebit_is_set(region->unused_phy_pages, pg)) { - pg = sparsebit_next_set(region->unused_phy_pages, pg); - if (pg == 0) { - fprintf(stderr, "No guest physical page available, " - "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", - paddr_min, vm->page_size, memslot); - fputs("---- vm dump ----\n", stderr); - vm_dump(stderr, vm, 2); - abort(); + base = pg = paddr_min >> vm->page_shift; + + do { + for (; pg < base + num; ++pg) { + if (!sparsebit_is_set(region->unused_phy_pages, pg)) { + base = pg = sparsebit_next_set(region->unused_phy_pages, pg); + break; + } } + } while (pg && pg != base + num); + + if (pg == 0) { + fprintf(stderr, "No guest physical page available, " + "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", + paddr_min, vm->page_size, memslot); + fputs("---- vm dump ----\n", stderr); + vm_dump(stderr, vm, 2); + abort(); } - /* Specify page as in use and return its address. */ - sparsebit_clear(region->unused_phy_pages, pg); + for (pg = base; pg < base + num; ++pg) + sparsebit_clear(region->unused_phy_pages, pg); + + return base * vm->page_size; +} - return pg * vm->page_size; +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, + uint32_t memslot) +{ + return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); } /* -- cgit v1.2.3 From 7a6629ef746d1a50cc9247bd7c92ce6b38b8ed68 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:30 +0200 Subject: kvm: selftests: add virt mem support for aarch64 Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/lib/aarch64/processor.c | 216 +++++++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 1 + .../testing/selftests/kvm/lib/kvm_util_internal.h | 1 + 3 files changed, 218 insertions(+) create mode 100644 tools/testing/selftests/kvm/lib/aarch64/processor.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c new file mode 100644 index 000000000000..464d4d074249 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AArch64 code + * + * Copyright (C) 2018, Red Hat, Inc. + */ + +#include "kvm_util.h" +#include "../kvm_util_internal.h" + +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +static uint64_t page_align(struct kvm_vm *vm, uint64_t v) +{ + return (v + vm->page_size) & ~(vm->page_size - 1); +} + +static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva) +{ + unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; + uint64_t mask = (1UL << (vm->va_bits - shift)) - 1; + + return (gva >> shift) & mask; +} + +static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva) +{ + unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift; + uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + + TEST_ASSERT(vm->pgtable_levels == 4, + "Mode %d does not have 4 page table levels", vm->mode); + + return (gva >> shift) & mask; +} + +static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva) +{ + unsigned int shift = (vm->page_shift - 3) + vm->page_shift; + uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + + TEST_ASSERT(vm->pgtable_levels >= 3, + "Mode %d does not have >= 3 page table levels", vm->mode); + + return (gva >> shift) & mask; +} + +static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + return (gva >> vm->page_shift) & mask; +} + +static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +{ + uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift; + return entry & mask; +} + +static uint64_t ptrs_per_pgd(struct kvm_vm *vm) +{ + unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; + return 1 << (vm->va_bits - shift); +} + +static uint64_t ptrs_per_pte(struct kvm_vm *vm) +{ + return 1 << (vm->page_shift - 3); +} + +void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) +{ + int rc; + + if (!vm->pgd_created) { + vm_paddr_t paddr = vm_phy_pages_alloc(vm, + page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + vm->pgd = paddr; + vm->pgd_created = true; + } +} + +void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint32_t pgd_memslot, uint64_t flags) +{ + uint8_t attr_idx = flags & 7; + uint64_t *ptep; + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; + if (!*ptep) { + *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + *ptep |= 3; + } + + switch (vm->pgtable_levels) { + case 4: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; + if (!*ptep) { + *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + *ptep |= 3; + } + /* fall through */ + case 3: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; + if (!*ptep) { + *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + *ptep |= 3; + } + /* fall through */ + case 2: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; + break; + default: + TEST_ASSERT(false, "Page table levels must be 2, 3, or 4"); + } + + *ptep = paddr | 3; + *ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */; +} + +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint32_t pgd_memslot) +{ + uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */ + + _virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx); +} + +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep; + + if (!vm->pgd_created) + goto unmapped_gva; + + ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; + if (!ptep) + goto unmapped_gva; + + switch (vm->pgtable_levels) { + case 4: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; + if (!ptep) + goto unmapped_gva; + /* fall through */ + case 3: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; + if (!ptep) + goto unmapped_gva; + /* fall through */ + case 2: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; + if (!ptep) + goto unmapped_gva; + break; + default: + TEST_ASSERT(false, "Page table levels must be 2, 3, or 4"); + } + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); + +unmapped_gva: + TEST_ASSERT(false, "No mapping for vm virtual address, " + "gva: 0x%lx", gva); +} + +static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) +{ +#ifdef DEBUG_VM + static const char * const type[] = { "", "pud", "pmd", "pte" }; + uint64_t pte, *ptep; + + if (level == 4) + return; + + for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { + ptep = addr_gpa2hva(vm, pte); + if (!*ptep) + continue; + printf("%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep); + pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1); + } +#endif +} + +void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + int level = 4 - (vm->pgtable_levels - 1); + uint64_t pgd, *ptep; + + if (!vm->pgd_created) + return; + + for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) { + ptep = addr_gpa2hva(vm, pgd); + if (!*ptep) + continue; + printf("%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep); + pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level); + } +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 26c3c4350a88..fd346fb82e46 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -129,6 +129,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) /* Setup mode specific traits. */ switch (vm->mode) { case VM_MODE_FLAT48PG: + vm->pgtable_levels = 4; vm->page_size = 0x1000; vm->page_shift = 12; vm->va_bits = 48; diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 3702c560bbf2..d048d7363d66 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -46,6 +46,7 @@ struct kvm_vm { int mode; int kvm_fd; int fd; + unsigned int pgtable_levels; unsigned int page_size; unsigned int page_shift; unsigned int va_bits; -- cgit v1.2.3 From 0bec140fb6c106a70aac183e5fa99de0ef1d161a Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:31 +0200 Subject: kvm: selftests: add vcpu support for aarch64 This code adds VM and VCPU setup code for the VM_MODE_FLAT48PG mode. The VM_MODE_FLAT48PG isn't yet fully supportable, as it defines the guest physical address limit as 52-bits, and KVM currently only supports guests with up to 40-bit physical addresses (see KVM_PHYS_SHIFT). VM_MODE_FLAT48PG will work fine, though, as long as no >= 40-bit physical addresses are used. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/aarch64/processor.h | 55 ++++++++++++++ .../testing/selftests/kvm/lib/aarch64/processor.c | 83 ++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 tools/testing/selftests/kvm/include/aarch64/processor.h (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h new file mode 100644 index 000000000000..9ef2ab1a0c08 --- /dev/null +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AArch64 processor specific defines + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +#include "kvm_util.h" + + +#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +#define CPACR_EL1 3, 0, 1, 0, 2 +#define TCR_EL1 3, 0, 2, 0, 2 +#define MAIR_EL1 3, 0, 10, 2, 0 +#define TTBR0_EL1 3, 0, 2, 0, 0 +#define SCTLR_EL1 3, 0, 1, 0, 0 + +/* + * Default MAIR + * index attribute + * DEVICE_nGnRnE 0 0000:0000 + * DEVICE_nGnRE 1 0000:0100 + * DEVICE_GRE 2 0000:1100 + * NORMAL_NC 3 0100:0100 + * NORMAL 4 1111:1111 + * NORMAL_WT 5 1011:1011 + */ +#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \ + (0x04ul << (1 * 8)) | \ + (0x0cul << (2 * 8)) | \ + (0x44ul << (3 * 8)) | \ + (0xfful << (4 * 8)) | \ + (0xbbul << (5 * 8))) + +static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr) +{ + struct kvm_one_reg reg; + reg.id = id; + reg.addr = (uint64_t)addr; + vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, ®); +} + +static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t val) +{ + struct kvm_one_reg reg; + reg.id = id; + reg.addr = (uint64_t)&val; + vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, ®); +} + +#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 464d4d074249..871fe2173679 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -5,10 +5,14 @@ * Copyright (C) 2018, Red Hat, Inc. */ +#define _GNU_SOURCE /* for program_invocation_name */ + #include "kvm_util.h" #include "../kvm_util_internal.h" +#include "processor.h" #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 +#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 static uint64_t page_align(struct kvm_vm *vm, uint64_t v) { @@ -214,3 +218,82 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level); } } + +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, + void *guest_code) +{ + uint64_t ptrs_per_4k_pte = 512; + uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2; + struct kvm_vm *vm; + + vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + vm_vcpu_add_default(vm, vcpuid, guest_code); + + return vm; +} + +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +{ + size_t stack_size = vm->page_size == 4096 ? + DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size, + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0); + + vm_vcpu_add(vm, vcpuid, 0, 0); + + set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); + set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); +} + +void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) +{ + struct kvm_vcpu_init init; + uint64_t sctlr_el1, tcr_el1; + + memset(&init, 0, sizeof(init)); + init.target = KVM_ARM_TARGET_GENERIC_V8; + vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, &init); + + /* + * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 + * registers, which the variable argument list macros do. + */ + set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20); + + get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1); + get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1); + + switch (vm->mode) { + case VM_MODE_FLAT48PG: + tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + break; + default: + TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); + } + + sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; + /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; + tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); + tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + + set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1); + set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1); + set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1); + set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd); +} + +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +{ + uint64_t pstate, pc; + + get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate); + get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc); + + fprintf(stream, "%*spstate: 0x%.16llx pc: 0x%.16llx\n", + indent, "", pstate, pc); + +} -- cgit v1.2.3 From 81d1cca0c062a589953eef7e98f1e68b8222a918 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:33 +0200 Subject: kvm: selftests: introduce new VM mode for 64K pages Rename VM_MODE_FLAT48PG to be more descriptive of its config and add a new config that has the same parameters, except with 64K pages. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util.h | 7 +++- .../testing/selftests/kvm/lib/aarch64/processor.c | 8 +++- tools/testing/selftests/kvm/lib/kvm_util.c | 46 ++++++++++++++-------- .../testing/selftests/kvm/lib/kvm_util_internal.h | 1 + tools/testing/selftests/kvm/lib/x86_64/processor.c | 10 ++--- 5 files changed, 47 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index e0c9cf4d1c49..88df26c41845 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -34,9 +34,14 @@ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ #define DEFAULT_STACK_PGS 5 enum vm_guest_mode { - VM_MODE_FLAT48PG, + VM_MODE_P52V48_4K, + VM_MODE_P52V48_64K, + NUM_VM_MODES, }; +#define vm_guest_mode_string(m) vm_guest_mode_string[m] +extern const char * const vm_guest_mode_string[]; + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 871fe2173679..b1dfc0d4b68e 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -226,7 +226,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2; struct kvm_vm *vm; - vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); vm_vcpu_add_default(vm, vcpuid, guest_code); @@ -267,10 +267,14 @@ void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1); switch (vm->mode) { - case VM_MODE_FLAT48PG: + case VM_MODE_P52V48_4K: tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ break; + case VM_MODE_P52V48_64K: + tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + break; default: TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index fd346fb82e46..b37e52f4daad 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -17,7 +17,7 @@ #include #define KVM_UTIL_PGS_PER_HUGEPG 512 -#define KVM_UTIL_MIN_PADDR 0x2000 +#define KVM_UTIL_MIN_PFN 2 /* Aligns x up to the next multiple of size. Size must be a power of 2. */ static void *align(void *x, size_t size) @@ -96,11 +96,16 @@ static void vm_open(struct kvm_vm *vm, int perm) "rc: %i errno: %i", vm->fd, errno); } +const char * const vm_guest_mode_string[] = { + "PA-bits:52, VA-bits:48, 4K pages", + "PA-bits:52, VA-bits:48, 64K pages", +}; + /* * VM Create * * Input Args: - * mode - VM Mode (e.g. VM_MODE_FLAT48PG) + * mode - VM Mode (e.g. VM_MODE_P52V48_4K) * phy_pages - Physical memory pages * perm - permission * @@ -109,7 +114,7 @@ static void vm_open(struct kvm_vm *vm, int perm) * Return: * Pointer to opaque structure that describes the created VM. * - * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG). + * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). * When phy_pages is non-zero, a memory region of phy_pages physical pages * is created and mapped starting at guest physical address 0. The file * descriptor to control the created VM is created with the permissions @@ -128,28 +133,34 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) /* Setup mode specific traits. */ switch (vm->mode) { - case VM_MODE_FLAT48PG: + case VM_MODE_P52V48_4K: vm->pgtable_levels = 4; vm->page_size = 0x1000; vm->page_shift = 12; vm->va_bits = 48; - - /* Limit to 48-bit canonical virtual addresses. */ - vm->vpages_valid = sparsebit_alloc(); - sparsebit_set_num(vm->vpages_valid, - 0, (1ULL << (48 - 1)) >> vm->page_shift); - sparsebit_set_num(vm->vpages_valid, - (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift, - (1ULL << (48 - 1)) >> vm->page_shift); - - /* Limit physical addresses to 52-bits. */ - vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1; break; - + case VM_MODE_P52V48_64K: + vm->pgtable_levels = 3; + vm->pa_bits = 52; + vm->page_size = 0x10000; + vm->page_shift = 16; + vm->va_bits = 48; + break; default: TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); } + /* Limit to VA-bit canonical virtual addresses. */ + vm->vpages_valid = sparsebit_alloc(); + sparsebit_set_num(vm->vpages_valid, + 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); + sparsebit_set_num(vm->vpages_valid, + (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, + (1ULL << (vm->va_bits - 1)) >> vm->page_shift); + + /* Limit physical addresses to PA-bits. */ + vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; + /* Allocate and setup memory for guest. */ vm->vpages_mapped = sparsebit_alloc(); if (phy_pages != 0) @@ -868,7 +879,8 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, pages--, vaddr += vm->page_size) { vm_paddr_t paddr; - paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot); + paddr = vm_phy_page_alloc(vm, + KVM_UTIL_MIN_PFN * vm->page_size, data_memslot); virt_pg_map(vm, vaddr, paddr, pgd_memslot); diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index d048d7363d66..52701db0f253 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -49,6 +49,7 @@ struct kvm_vm { unsigned int pgtable_levels; unsigned int page_size; unsigned int page_shift; + unsigned int pa_bits; unsigned int va_bits; uint64_t max_gfn; struct vcpu *vcpu_head; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 67f84fbb0ca4..f28127f4a3af 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -231,7 +231,7 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) { int rc; - TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); /* If needed, create page map l4 table. */ @@ -264,7 +264,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint16_t index[4]; struct pageMapL4Entry *pml4e; - TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -551,7 +551,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) struct pageTableEntry *pte; void *hva; - TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); index[0] = (gva >> 12) & 0x1ffu; @@ -624,7 +624,7 @@ void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); switch (vm->mode) { - case VM_MODE_FLAT48PG: + case VM_MODE_P52V48_4K: sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); @@ -823,7 +823,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; /* Create VM */ - vm = vm_create(VM_MODE_FLAT48PG, + vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); -- cgit v1.2.3 From fff8dcd7b4a242e3752d6f316967b02f933031d0 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:32 +0200 Subject: kvm: selftests: port dirty_log_test to aarch64 While we're messing with the code for the port and to support guest page sizes that are less than the host page size, we also make some code formatting cleanups and apply sync_global_to_guest(). Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 2 +- tools/testing/selftests/kvm/Makefile | 5 +- tools/testing/selftests/kvm/dirty_log_test.c | 315 +++++++++++++++++++++ .../testing/selftests/kvm/x86_64/dirty_log_test.c | 308 -------------------- 4 files changed, 320 insertions(+), 310 deletions(-) create mode 100644 tools/testing/selftests/kvm/dirty_log_test.c delete mode 100644 tools/testing/selftests/kvm/x86_64/dirty_log_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index e0aa5411cf9b..552b0d9a49f4 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -4,4 +4,4 @@ /x86_64/sync_regs_test /x86_64/vmx_tsc_adjust_test /x86_64/state_test -/x86_64/dirty_log_test +/dirty_log_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 959be146f789..5e23f97ec46f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -5,6 +5,7 @@ UNAME_M := $(shell uname -m) LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c +LIBKVM_aarch64 = lib/aarch64/processor.c TEST_GEN_PROGS_x86_64 = x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test @@ -12,7 +13,9 @@ TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/state_test -TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_test +TEST_GEN_PROGS_x86_64 += dirty_log_test + +TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c new file mode 100644 index 000000000000..d90f1091f687 --- /dev/null +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging test + * + * Copyright (C) 2018, Red Hat, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define DEBUG printf + +#define VCPU_ID 1 + +/* The memory slot index to track dirty pages */ +#define TEST_MEM_SLOT_INDEX 1 + +/* + * GPA offset of the testing memory slot. Must be bigger than the + * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES. + */ +#define TEST_MEM_OFFSET (1ul << 30) /* 1G */ + +/* Size of the testing memory slot */ +#define TEST_MEM_PAGES (1ul << 18) /* 1G for 4K pages */ + +/* How many pages to dirty for each guest loop */ +#define TEST_PAGES_PER_LOOP 1024 + +/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */ +#define TEST_HOST_LOOP_N 32 + +/* Interval for each host loop (ms) */ +#define TEST_HOST_LOOP_INTERVAL 10 + +/* + * Guest/Host shared variables. Ensure addr_gva2hva() and/or + * sync_global_to/from_guest() are used when accessing from + * the host. READ/WRITE_ONCE() should also be used with anything + * that may change. + */ +static uint64_t host_page_size; +static uint64_t guest_page_size; +static uint64_t random_array[TEST_PAGES_PER_LOOP]; +static uint64_t iteration; + +/* + * Continuously write to the first 8 bytes of a random pages within + * the testing memory region. + */ +static void guest_code(void) +{ + int i; + + while (true) { + for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { + uint64_t addr = TEST_MEM_OFFSET; + addr += (READ_ONCE(random_array[i]) % TEST_MEM_PAGES) + * guest_page_size; + addr &= ~(host_page_size - 1); + *(uint64_t *)addr = READ_ONCE(iteration); + } + + /* Tell the host that we need more random numbers */ + GUEST_SYNC(1); + } +} + +/* Host variables */ +static bool host_quit; + +/* Points to the test VM memory region on which we track dirty logs */ +static void *host_test_mem; +static uint64_t host_num_pages; + +/* For statistics only */ +static uint64_t host_dirty_count; +static uint64_t host_clear_count; +static uint64_t host_track_next_count; + +/* + * We use this bitmap to track some pages that should have its dirty + * bit set in the _next_ iteration. For example, if we detected the + * page value changed to current iteration but at the same time the + * page bit is cleared in the latest bitmap, then the system must + * report that write in the next get dirty log call. + */ +static unsigned long *host_bmap_track; + +static void generate_random_array(uint64_t *guest_array, uint64_t size) +{ + uint64_t i; + + for (i = 0; i < size; i++) + guest_array[i] = random(); +} + +static void *vcpu_worker(void *data) +{ + int ret; + struct kvm_vm *vm = data; + uint64_t *guest_array; + uint64_t pages_count = 0; + struct kvm_run *run; + struct ucall uc; + + run = vcpu_state(vm, VCPU_ID); + + guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array); + generate_random_array(guest_array, TEST_PAGES_PER_LOOP); + + while (!READ_ONCE(host_quit)) { + /* Let the guest dirty the random pages */ + ret = _vcpu_run(vm, VCPU_ID); + if (get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) { + pages_count += TEST_PAGES_PER_LOOP; + generate_random_array(guest_array, TEST_PAGES_PER_LOOP); + } else { + TEST_ASSERT(false, + "Invalid guest sync status: " + "exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + } + } + + DEBUG("Dirtied %"PRIu64" pages\n", pages_count); + + return NULL; +} + +static void vm_dirty_log_verify(unsigned long *bmap) +{ + uint64_t page; + uint64_t *value_ptr; + + for (page = 0; page < host_num_pages; page++) { + value_ptr = host_test_mem + page * host_page_size; + + /* If this is a special page that we were tracking... */ + if (test_and_clear_bit(page, host_bmap_track)) { + host_track_next_count++; + TEST_ASSERT(test_bit(page, bmap), + "Page %"PRIu64" should have its dirty bit " + "set in this iteration but it is missing", + page); + } + + if (test_bit(page, bmap)) { + host_dirty_count++; + /* + * If the bit is set, the value written onto + * the corresponding page should be either the + * previous iteration number or the current one. + */ + TEST_ASSERT(*value_ptr == iteration || + *value_ptr == iteration - 1, + "Set page %"PRIu64" value %"PRIu64 + " incorrect (iteration=%"PRIu64")", + page, *value_ptr, iteration); + } else { + host_clear_count++; + /* + * If cleared, the value written can be any + * value smaller or equals to the iteration + * number. Note that the value can be exactly + * (iteration-1) if that write can happen + * like this: + * + * (1) increase loop count to "iteration-1" + * (2) write to page P happens (with value + * "iteration-1") + * (3) get dirty log for "iteration-1"; we'll + * see that page P bit is set (dirtied), + * and not set the bit in host_bmap_track + * (4) increase loop count to "iteration" + * (which is current iteration) + * (5) get dirty log for current iteration, + * we'll see that page P is cleared, with + * value "iteration-1". + */ + TEST_ASSERT(*value_ptr <= iteration, + "Clear page %"PRIu64" value %"PRIu64 + " incorrect (iteration=%"PRIu64")", + page, *value_ptr, iteration); + if (*value_ptr == iteration) { + /* + * This page is _just_ modified; it + * should report its dirtyness in the + * next run + */ + set_bit(page, host_bmap_track); + } + } + } +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-i iterations] [-I interval] [-h]\n", name); + puts(""); + printf(" -i: specify iteration counts (default: %"PRIu64")\n", + TEST_HOST_LOOP_N); + printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", + TEST_HOST_LOOP_INTERVAL); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + pthread_t vcpu_thread; + struct kvm_vm *vm; + unsigned long iterations = TEST_HOST_LOOP_N; + unsigned long interval = TEST_HOST_LOOP_INTERVAL; + unsigned long *bmap; + int opt; + + while ((opt = getopt(argc, argv, "hi:I:")) != -1) { + switch (opt) { + case 'i': + iterations = strtol(optarg, NULL, 10); + break; + case 'I': + interval = strtol(optarg, NULL, 10); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(interval > 0, "Interval must be greater than zero"); + + DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", + iterations, interval); + + srandom(time(0)); + + guest_page_size = 4096; + host_page_size = getpagesize(); + host_num_pages = (TEST_MEM_PAGES * guest_page_size) / host_page_size + + !!((TEST_MEM_PAGES * guest_page_size) % host_page_size); + + bmap = bitmap_alloc(host_num_pages); + host_bmap_track = bitmap_alloc(host_num_pages); + + vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code); + + /* Add an extra memory slot for testing dirty logging */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + TEST_MEM_OFFSET, + TEST_MEM_SLOT_INDEX, + TEST_MEM_PAGES, + KVM_MEM_LOG_DIRTY_PAGES); + + /* Do 1:1 mapping for the dirty track memory slot */ + virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET, + TEST_MEM_PAGES * guest_page_size, 0); + + /* Cache the HVA pointer of the region */ + host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET); + +#ifdef __x86_64__ + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); +#endif +#ifdef __aarch64__ + ucall_init(vm, UCALL_MMIO, NULL); +#endif + + /* Tell the guest about the page sizes */ + sync_global_to_guest(vm, host_page_size); + sync_global_to_guest(vm, guest_page_size); + + /* Start the iterations */ + iteration = 1; + sync_global_to_guest(vm, iteration); + + pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); + + while (iteration < iterations) { + /* Give the vcpu thread some time to dirty some pages */ + usleep(interval * 1000); + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + vm_dirty_log_verify(bmap); + iteration++; + sync_global_to_guest(vm, iteration); + } + + /* Tell the vcpu thread to quit */ + host_quit = true; + pthread_join(vcpu_thread, NULL); + + DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " + "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count, + host_track_next_count); + + free(bmap); + free(host_bmap_track); + ucall_uninit(vm); + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_test.c deleted file mode 100644 index 395a6e07d37c..000000000000 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_test.c +++ /dev/null @@ -1,308 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * KVM dirty page logging test - * - * Copyright (C) 2018, Red Hat, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -#define DEBUG printf - -#define VCPU_ID 1 -/* The memory slot index to track dirty pages */ -#define TEST_MEM_SLOT_INDEX 1 -/* - * GPA offset of the testing memory slot. Must be bigger than the - * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES. - */ -#define TEST_MEM_OFFSET (1ULL << 30) /* 1G */ -/* Size of the testing memory slot */ -#define TEST_MEM_PAGES (1ULL << 18) /* 1G for 4K pages */ -/* How many pages to dirty for each guest loop */ -#define TEST_PAGES_PER_LOOP 1024 -/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */ -#define TEST_HOST_LOOP_N 32 -/* Interval for each host loop (ms) */ -#define TEST_HOST_LOOP_INTERVAL 10 - -/* - * Guest variables. We use these variables to share data between host - * and guest. There are two copies of the variables, one in host memory - * (which is unused) and one in guest memory. When the host wants to - * access these variables, it needs to call addr_gva2hva() to access the - * guest copy. - */ -uint64_t guest_random_array[TEST_PAGES_PER_LOOP]; -uint64_t guest_iteration; -uint64_t guest_page_size; - -/* - * Writes to the first byte of a random page within the testing memory - * region continuously. - */ -void guest_code(void) -{ - int i = 0; - uint64_t volatile *array = guest_random_array; - uint64_t volatile *guest_addr; - - while (true) { - for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { - /* - * Write to the first 8 bytes of a random page - * on the testing memory region. - */ - guest_addr = (uint64_t *) - (TEST_MEM_OFFSET + - (array[i] % TEST_MEM_PAGES) * guest_page_size); - *guest_addr = guest_iteration; - } - /* Tell the host that we need more random numbers */ - GUEST_SYNC(1); - } -} - -/* - * Host variables. These variables should only be used by the host - * rather than the guest. - */ -bool host_quit; - -/* Points to the test VM memory region on which we track dirty logs */ -void *host_test_mem; - -/* For statistics only */ -uint64_t host_dirty_count; -uint64_t host_clear_count; -uint64_t host_track_next_count; - -/* - * We use this bitmap to track some pages that should have its dirty - * bit set in the _next_ iteration. For example, if we detected the - * page value changed to current iteration but at the same time the - * page bit is cleared in the latest bitmap, then the system must - * report that write in the next get dirty log call. - */ -unsigned long *host_bmap_track; - -void generate_random_array(uint64_t *guest_array, uint64_t size) -{ - uint64_t i; - - for (i = 0; i < size; i++) { - guest_array[i] = random(); - } -} - -void *vcpu_worker(void *data) -{ - int ret; - uint64_t loops, *guest_array, pages_count = 0; - struct kvm_vm *vm = data; - struct kvm_run *run; - struct ucall uc; - - run = vcpu_state(vm, VCPU_ID); - - /* Retrieve the guest random array pointer and cache it */ - guest_array = addr_gva2hva(vm, (vm_vaddr_t)guest_random_array); - - DEBUG("VCPU starts\n"); - - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); - - while (!READ_ONCE(host_quit)) { - /* Let the guest to dirty these random pages */ - ret = _vcpu_run(vm, VCPU_ID); - if (run->exit_reason == KVM_EXIT_IO && - get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) { - pages_count += TEST_PAGES_PER_LOOP; - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); - } else { - TEST_ASSERT(false, - "Invalid guest sync status: " - "exit_reason=%s\n", - exit_reason_str(run->exit_reason)); - } - } - - DEBUG("VCPU exits, dirtied %"PRIu64" pages\n", pages_count); - - return NULL; -} - -void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration) -{ - uint64_t page; - uint64_t volatile *value_ptr; - - for (page = 0; page < TEST_MEM_PAGES; page++) { - value_ptr = host_test_mem + page * getpagesize(); - - /* If this is a special page that we were tracking... */ - if (test_and_clear_bit(page, host_bmap_track)) { - host_track_next_count++; - TEST_ASSERT(test_bit(page, bmap), - "Page %"PRIu64" should have its dirty bit " - "set in this iteration but it is missing", - page); - } - - if (test_bit(page, bmap)) { - host_dirty_count++; - /* - * If the bit is set, the value written onto - * the corresponding page should be either the - * previous iteration number or the current one. - */ - TEST_ASSERT(*value_ptr == iteration || - *value_ptr == iteration - 1, - "Set page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); - } else { - host_clear_count++; - /* - * If cleared, the value written can be any - * value smaller or equals to the iteration - * number. Note that the value can be exactly - * (iteration-1) if that write can happen - * like this: - * - * (1) increase loop count to "iteration-1" - * (2) write to page P happens (with value - * "iteration-1") - * (3) get dirty log for "iteration-1"; we'll - * see that page P bit is set (dirtied), - * and not set the bit in host_bmap_track - * (4) increase loop count to "iteration" - * (which is current iteration) - * (5) get dirty log for current iteration, - * we'll see that page P is cleared, with - * value "iteration-1". - */ - TEST_ASSERT(*value_ptr <= iteration, - "Clear page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); - if (*value_ptr == iteration) { - /* - * This page is _just_ modified; it - * should report its dirtyness in the - * next run - */ - set_bit(page, host_bmap_track); - } - } - } -} - -void help(char *name) -{ - puts(""); - printf("usage: %s [-i iterations] [-I interval] [-h]\n", name); - puts(""); - printf(" -i: specify iteration counts (default: %"PRIu64")\n", - TEST_HOST_LOOP_N); - printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", - TEST_HOST_LOOP_INTERVAL); - puts(""); - exit(0); -} - -int main(int argc, char *argv[]) -{ - pthread_t vcpu_thread; - struct kvm_vm *vm; - uint64_t volatile *psize, *iteration; - unsigned long *bmap, iterations = TEST_HOST_LOOP_N, - interval = TEST_HOST_LOOP_INTERVAL; - int opt; - - while ((opt = getopt(argc, argv, "hi:I:")) != -1) { - switch (opt) { - case 'i': - iterations = strtol(optarg, NULL, 10); - break; - case 'I': - interval = strtol(optarg, NULL, 10); - break; - case 'h': - default: - help(argv[0]); - break; - } - } - - TEST_ASSERT(iterations > 2, "Iteration must be bigger than zero\n"); - TEST_ASSERT(interval > 0, "Interval must be bigger than zero"); - - DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", - iterations, interval); - - srandom(time(0)); - - bmap = bitmap_alloc(TEST_MEM_PAGES); - host_bmap_track = bitmap_alloc(TEST_MEM_PAGES); - - vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code); - - /* Add an extra memory slot for testing dirty logging */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - TEST_MEM_OFFSET, - TEST_MEM_SLOT_INDEX, - TEST_MEM_PAGES, - KVM_MEM_LOG_DIRTY_PAGES); - /* Cache the HVA pointer of the region */ - host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET); - - /* Do 1:1 mapping for the dirty track memory slot */ - virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET, - TEST_MEM_PAGES * getpagesize(), 0); - - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - - /* Tell the guest about the page size on the system */ - psize = addr_gva2hva(vm, (vm_vaddr_t)&guest_page_size); - *psize = getpagesize(); - - /* Start the iterations */ - iteration = addr_gva2hva(vm, (vm_vaddr_t)&guest_iteration); - *iteration = 1; - - /* Start dirtying pages */ - pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); - - while (*iteration < iterations) { - /* Give the vcpu thread some time to dirty some pages */ - usleep(interval * 1000); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); - vm_dirty_log_verify(bmap, *iteration); - (*iteration)++; - } - - /* Tell the vcpu thread to quit */ - host_quit = true; - pthread_join(vcpu_thread, NULL); - - DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " - "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count, - host_track_next_count); - - free(bmap); - free(host_bmap_track); - kvm_vm_free(vm); - - return 0; -} -- cgit v1.2.3 From e1b376f140ad9cbb1e1976f347377b8ef08a5bc9 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:34 +0200 Subject: kvm: selftests: dirty_log_test: also test 64K pages on aarch64 Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 183 ++++++++++++++++++++------- 1 file changed, 137 insertions(+), 46 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index d90f1091f687..3afc7d607a2e 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -5,6 +5,8 @@ * Copyright (C) 2018, Red Hat, Inc. */ +#define _GNU_SOURCE /* for program_invocation_name */ + #include #include #include @@ -30,9 +32,6 @@ */ #define TEST_MEM_OFFSET (1ul << 30) /* 1G */ -/* Size of the testing memory slot */ -#define TEST_MEM_PAGES (1ul << 18) /* 1G for 4K pages */ - /* How many pages to dirty for each guest loop */ #define TEST_PAGES_PER_LOOP 1024 @@ -50,6 +49,7 @@ */ static uint64_t host_page_size; static uint64_t guest_page_size; +static uint64_t guest_num_pages; static uint64_t random_array[TEST_PAGES_PER_LOOP]; static uint64_t iteration; @@ -64,7 +64,7 @@ static void guest_code(void) while (true) { for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { uint64_t addr = TEST_MEM_OFFSET; - addr += (READ_ONCE(random_array[i]) % TEST_MEM_PAGES) + addr += (READ_ONCE(random_array[i]) % guest_num_pages) * guest_page_size; addr &= ~(host_page_size - 1); *(uint64_t *)addr = READ_ONCE(iteration); @@ -141,8 +141,10 @@ static void vm_dirty_log_verify(unsigned long *bmap) { uint64_t page; uint64_t *value_ptr; + uint64_t step = host_page_size >= guest_page_size ? 1 : + guest_page_size / host_page_size; - for (page = 0; page < host_num_pages; page++) { + for (page = 0; page < host_num_pages; page += step) { value_ptr = host_test_mem + page * host_page_size; /* If this is a special page that we were tracking... */ @@ -203,71 +205,64 @@ static void vm_dirty_log_verify(unsigned long *bmap) } } -static void help(char *name) +static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, + uint64_t extra_mem_pages, void *guest_code) { - puts(""); - printf("usage: %s [-i iterations] [-I interval] [-h]\n", name); - puts(""); - printf(" -i: specify iteration counts (default: %"PRIu64")\n", - TEST_HOST_LOOP_N); - printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", - TEST_HOST_LOOP_INTERVAL); - puts(""); - exit(0); + struct kvm_vm *vm; + uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; + + vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); +#ifdef __x86_64__ + vm_create_irqchip(vm); +#endif + vm_vcpu_add_default(vm, vcpuid, guest_code); + return vm; } -int main(int argc, char *argv[]) +static void run_test(enum vm_guest_mode mode, unsigned long iterations, + unsigned long interval) { + unsigned int guest_page_shift; pthread_t vcpu_thread; struct kvm_vm *vm; - unsigned long iterations = TEST_HOST_LOOP_N; - unsigned long interval = TEST_HOST_LOOP_INTERVAL; unsigned long *bmap; - int opt; - while ((opt = getopt(argc, argv, "hi:I:")) != -1) { - switch (opt) { - case 'i': - iterations = strtol(optarg, NULL, 10); - break; - case 'I': - interval = strtol(optarg, NULL, 10); - break; - case 'h': - default: - help(argv[0]); - break; - } + switch (mode) { + case VM_MODE_P52V48_4K: + guest_page_shift = 12; + break; + case VM_MODE_P52V48_64K: + guest_page_shift = 16; + break; + default: + TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); } - TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); - TEST_ASSERT(interval > 0, "Interval must be greater than zero"); - - DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", - iterations, interval); - - srandom(time(0)); + DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - guest_page_size = 4096; + guest_page_size = (1ul << guest_page_shift); + /* 1G of guest page sized pages */ + guest_num_pages = (1ul << (30 - guest_page_shift)); host_page_size = getpagesize(); - host_num_pages = (TEST_MEM_PAGES * guest_page_size) / host_page_size + - !!((TEST_MEM_PAGES * guest_page_size) % host_page_size); + host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + + !!((guest_num_pages * guest_page_size) % host_page_size); bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); - vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code); + vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code); /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, TEST_MEM_OFFSET, TEST_MEM_SLOT_INDEX, - TEST_MEM_PAGES, + guest_num_pages, KVM_MEM_LOG_DIRTY_PAGES); /* Do 1:1 mapping for the dirty track memory slot */ virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET, - TEST_MEM_PAGES * guest_page_size, 0); + guest_num_pages * guest_page_size, 0); /* Cache the HVA pointer of the region */ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET); @@ -279,13 +274,18 @@ int main(int argc, char *argv[]) ucall_init(vm, UCALL_MMIO, NULL); #endif - /* Tell the guest about the page sizes */ + /* Export the shared variables to the guest */ sync_global_to_guest(vm, host_page_size); sync_global_to_guest(vm, guest_page_size); + sync_global_to_guest(vm, guest_num_pages); /* Start the iterations */ iteration = 1; sync_global_to_guest(vm, iteration); + host_quit = false; + host_dirty_count = 0; + host_clear_count = 0; + host_track_next_count = 0; pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); @@ -310,6 +310,97 @@ int main(int argc, char *argv[]) free(host_bmap_track); ucall_uninit(vm); kvm_vm_free(vm); +} + +static struct vm_guest_modes { + enum vm_guest_mode mode; + bool supported; + bool enabled; +} vm_guest_modes[NUM_VM_MODES] = { + { VM_MODE_P52V48_4K, 1, 1, }, +#ifdef __aarch64__ + { VM_MODE_P52V48_64K, 1, 1, }, +#else + { VM_MODE_P52V48_64K, 0, 0, }, +#endif +}; + +static void help(char *name) +{ + int i; + + puts(""); + printf("usage: %s [-h] [-i iterations] [-I interval] [-m mode]\n", name); + puts(""); + printf(" -i: specify iteration counts (default: %"PRIu64")\n", + TEST_HOST_LOOP_N); + printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", + TEST_HOST_LOOP_INTERVAL); + printf(" -m: specify the guest mode ID to test " + "(default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", + vm_guest_modes[i].mode, + vm_guest_mode_string(vm_guest_modes[i].mode), + vm_guest_modes[i].supported ? " (supported)" : ""); + } + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + unsigned long iterations = TEST_HOST_LOOP_N; + unsigned long interval = TEST_HOST_LOOP_INTERVAL; + bool mode_selected = false; + unsigned int mode; + int opt, i; + + while ((opt = getopt(argc, argv, "hi:I:m:")) != -1) { + switch (opt) { + case 'i': + iterations = strtol(optarg, NULL, 10); + break; + case 'I': + interval = strtol(optarg, NULL, 10); + break; + case 'm': + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + vm_guest_modes[i].enabled = 0; + mode_selected = true; + } + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, + "Guest mode ID %d too big", mode); + vm_guest_modes[mode].enabled = 1; + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(interval > 0, "Interval must be greater than zero"); + + DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", + iterations, interval); + + srandom(time(0)); + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!vm_guest_modes[i].enabled) + continue; + TEST_ASSERT(vm_guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + vm_guest_modes[i].mode, + vm_guest_mode_string(vm_guest_modes[i].mode)); + run_test(vm_guest_modes[i].mode, iterations, interval); + } return 0; } -- cgit v1.2.3 From e28934e661c1d9425f1cc41b405dcd8626507206 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:35 +0200 Subject: kvm: selftests: stop lying to aarch64 tests about PA-bits Let's add the 40 PA-bit versions of the VM modes, that AArch64 should have been using, so we can extend the dirty log test without breaking things. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 13 ++++++++++--- tools/testing/selftests/kvm/include/kvm_util.h | 2 ++ tools/testing/selftests/kvm/lib/aarch64/processor.c | 8 ++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 16 ++++++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 3afc7d607a2e..61396882ad4e 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -230,9 +230,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, switch (mode) { case VM_MODE_P52V48_4K: + case VM_MODE_P40V48_4K: guest_page_shift = 12; break; case VM_MODE_P52V48_64K: + case VM_MODE_P40V48_64K: guest_page_shift = 16; break; default: @@ -317,11 +319,16 @@ static struct vm_guest_modes { bool supported; bool enabled; } vm_guest_modes[NUM_VM_MODES] = { +#if defined(__x86_64__) { VM_MODE_P52V48_4K, 1, 1, }, -#ifdef __aarch64__ - { VM_MODE_P52V48_64K, 1, 1, }, -#else { VM_MODE_P52V48_64K, 0, 0, }, + { VM_MODE_P40V48_4K, 0, 0, }, + { VM_MODE_P40V48_64K, 0, 0, }, +#elif defined(__aarch64__) + { VM_MODE_P52V48_4K, 0, 0, }, + { VM_MODE_P52V48_64K, 0, 0, }, + { VM_MODE_P40V48_4K, 1, 1, }, + { VM_MODE_P40V48_64K, 1, 1, }, #endif }; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 88df26c41845..a4e59e3b4826 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -36,6 +36,8 @@ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ enum vm_guest_mode { VM_MODE_P52V48_4K, VM_MODE_P52V48_64K, + VM_MODE_P40V48_4K, + VM_MODE_P40V48_64K, NUM_VM_MODES, }; diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index b1dfc0d4b68e..b6022e2f116e 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -275,6 +275,14 @@ void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ break; + case VM_MODE_P40V48_4K: + tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + break; + case VM_MODE_P40V48_64K: + tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + break; default: TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b37e52f4daad..8c06da4f03db 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -99,6 +99,8 @@ static void vm_open(struct kvm_vm *vm, int perm) const char * const vm_guest_mode_string[] = { "PA-bits:52, VA-bits:48, 4K pages", "PA-bits:52, VA-bits:48, 64K pages", + "PA-bits:40, VA-bits:48, 4K pages", + "PA-bits:40, VA-bits:48, 64K pages", }; /* @@ -146,6 +148,20 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) vm->page_shift = 16; vm->va_bits = 48; break; + case VM_MODE_P40V48_4K: + vm->pgtable_levels = 4; + vm->pa_bits = 40; + vm->va_bits = 48; + vm->page_size = 0x1000; + vm->page_shift = 12; + break; + case VM_MODE_P40V48_64K: + vm->pgtable_levels = 3; + vm->pa_bits = 40; + vm->va_bits = 48; + vm->page_size = 0x10000; + vm->page_shift = 16; + break; default: TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); } -- cgit v1.2.3 From 5b8ee8792f6bda46978e6e86fda4650bcbae07ab Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 18 Sep 2018 19:54:36 +0200 Subject: kvm: selftests: support high GPAs in dirty_log_test Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 65 +++++++++++++++++++++------- 1 file changed, 50 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 61396882ad4e..d59820cc2d39 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -26,11 +26,8 @@ /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1 -/* - * GPA offset of the testing memory slot. Must be bigger than the - * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES. - */ -#define TEST_MEM_OFFSET (1ul << 30) /* 1G */ +/* Default guest test memory offset, 1G */ +#define DEFAULT_GUEST_TEST_MEM 0x40000000 /* How many pages to dirty for each guest loop */ #define TEST_PAGES_PER_LOOP 1024 @@ -53,6 +50,12 @@ static uint64_t guest_num_pages; static uint64_t random_array[TEST_PAGES_PER_LOOP]; static uint64_t iteration; +/* + * GPA offset of the testing memory slot. Must be bigger than + * DEFAULT_GUEST_PHY_PAGES. + */ +static uint64_t guest_test_mem = DEFAULT_GUEST_TEST_MEM; + /* * Continuously write to the first 8 bytes of a random pages within * the testing memory region. @@ -63,7 +66,7 @@ static void guest_code(void) while (true) { for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { - uint64_t addr = TEST_MEM_OFFSET; + uint64_t addr = guest_test_mem; addr += (READ_ONCE(random_array[i]) % guest_num_pages) * guest_page_size; addr &= ~(host_page_size - 1); @@ -221,20 +224,29 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, } static void run_test(enum vm_guest_mode mode, unsigned long iterations, - unsigned long interval) + unsigned long interval, bool top_offset) { - unsigned int guest_page_shift; + unsigned int guest_pa_bits, guest_page_shift; pthread_t vcpu_thread; struct kvm_vm *vm; + uint64_t max_gfn; unsigned long *bmap; switch (mode) { case VM_MODE_P52V48_4K: - case VM_MODE_P40V48_4K: + guest_pa_bits = 52; guest_page_shift = 12; break; case VM_MODE_P52V48_64K: + guest_pa_bits = 52; + guest_page_shift = 16; + break; + case VM_MODE_P40V48_4K: + guest_pa_bits = 40; + guest_page_shift = 12; + break; case VM_MODE_P40V48_64K: + guest_pa_bits = 40; guest_page_shift = 16; break; default: @@ -243,6 +255,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; guest_page_size = (1ul << guest_page_shift); /* 1G of guest page sized pages */ guest_num_pages = (1ul << (30 - guest_page_shift)); @@ -250,6 +263,13 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + !!((guest_num_pages * guest_page_size) % host_page_size); + if (top_offset) { + guest_test_mem = (max_gfn - guest_num_pages) * guest_page_size; + guest_test_mem &= ~(host_page_size - 1); + } + + DEBUG("guest test mem offset: 0x%lx\n", guest_test_mem); + bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); @@ -257,17 +277,17 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - TEST_MEM_OFFSET, + guest_test_mem, TEST_MEM_SLOT_INDEX, guest_num_pages, KVM_MEM_LOG_DIRTY_PAGES); /* Do 1:1 mapping for the dirty track memory slot */ - virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET, + virt_map(vm, guest_test_mem, guest_test_mem, guest_num_pages * guest_page_size, 0); /* Cache the HVA pointer of the region */ - host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET); + host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_mem); #ifdef __x86_64__ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); @@ -279,6 +299,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, /* Export the shared variables to the guest */ sync_global_to_guest(vm, host_page_size); sync_global_to_guest(vm, guest_page_size); + sync_global_to_guest(vm, guest_test_mem); sync_global_to_guest(vm, guest_num_pages); /* Start the iterations */ @@ -337,12 +358,17 @@ static void help(char *name) int i; puts(""); - printf("usage: %s [-h] [-i iterations] [-I interval] [-m mode]\n", name); + printf("usage: %s [-h] [-i iterations] [-I interval] " + "[-o offset] [-t] [-m mode]\n", name); puts(""); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", TEST_HOST_LOOP_INTERVAL); + printf(" -o: guest test memory offset (default: 0x%lx)\n", + DEFAULT_GUEST_TEST_MEM); + printf(" -t: map guest test memory at the top of the allowed " + "physical address range\n"); printf(" -m: specify the guest mode ID to test " "(default: test all supported modes)\n" " This option may be used multiple times.\n" @@ -362,10 +388,11 @@ int main(int argc, char *argv[]) unsigned long iterations = TEST_HOST_LOOP_N; unsigned long interval = TEST_HOST_LOOP_INTERVAL; bool mode_selected = false; + bool top_offset = false; unsigned int mode; int opt, i; - while ((opt = getopt(argc, argv, "hi:I:m:")) != -1) { + while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) { switch (opt) { case 'i': iterations = strtol(optarg, NULL, 10); @@ -373,6 +400,12 @@ int main(int argc, char *argv[]) case 'I': interval = strtol(optarg, NULL, 10); break; + case 'o': + guest_test_mem = strtoull(optarg, NULL, 0); + break; + case 't': + top_offset = true; + break; case 'm': if (!mode_selected) { for (i = 0; i < NUM_VM_MODES; ++i) @@ -393,6 +426,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); TEST_ASSERT(interval > 0, "Interval must be greater than zero"); + TEST_ASSERT(!top_offset || guest_test_mem == DEFAULT_GUEST_TEST_MEM, + "Cannot use both -o [offset] and -t at the same time"); DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", iterations, interval); @@ -406,7 +441,7 @@ int main(int argc, char *argv[]) "Guest mode ID %d (%s) not supported.", vm_guest_modes[i].mode, vm_guest_mode_string(vm_guest_modes[i].mode)); - run_test(vm_guest_modes[i].mode, iterations, interval); + run_test(vm_guest_modes[i].mode, iterations, interval, top_offset); } return 0; -- cgit v1.2.3 From 1e7ecd1b3d21a6302f3ee4a3720f682eb2467a3c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 16 Oct 2018 18:50:08 +0200 Subject: KVM: selftests: state_test: test bare VMXON migration Split prepare_for_vmx_operation() into prepare_for_vmx_operation() and load_vmcs() so we can inject GUEST_SYNC() in between. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/vmx.h | 1 + tools/testing/selftests/kvm/lib/x86_64/vmx.c | 5 +++++ tools/testing/selftests/kvm/x86_64/state_test.c | 22 ++++++++++++---------- .../selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 1 + 4 files changed, 19 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 12ebd836f7ef..4bbee8560292 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -548,5 +548,6 @@ struct vmx_pages { struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); +bool load_vmcs(struct vmx_pages *vmx); #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index d7c401472247..cc356da9b3d8 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -107,6 +107,11 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) if (vmxon(vmx->vmxon_gpa)) return false; + return true; +} + +bool load_vmcs(struct vmx_pages *vmx) +{ /* Load a VMCS. */ *(uint32_t *)(vmx->vmcs) = vmcs_revision(); if (vmclear(vmx->vmcs_gpa)) diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 43df194a7c1e..03da41f0f736 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -26,20 +26,20 @@ static bool have_nested_state; void l2_guest_code(void) { - GUEST_SYNC(5); + GUEST_SYNC(6); /* Exit to L1 */ vmcall(); /* L1 has now set up a shadow VMCS for us. */ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); - GUEST_SYNC(9); + GUEST_SYNC(10); GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee)); - GUEST_SYNC(10); + GUEST_SYNC(11); GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee); GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee)); - GUEST_SYNC(11); + GUEST_SYNC(12); /* Done, exit to L1 and never come back. */ vmcall(); @@ -52,15 +52,17 @@ void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(vmx_pages->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_SYNC(3); + GUEST_ASSERT(load_vmcs(vmx_pages)); GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); - GUEST_SYNC(3); + GUEST_SYNC(4); GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); - GUEST_SYNC(4); + GUEST_SYNC(5); GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); @@ -72,7 +74,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(!vmresume()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - GUEST_SYNC(6); + GUEST_SYNC(7); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); GUEST_ASSERT(!vmresume()); @@ -85,12 +87,12 @@ void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); GUEST_ASSERT(vmlaunch()); - GUEST_SYNC(7); + GUEST_SYNC(8); GUEST_ASSERT(vmlaunch()); GUEST_ASSERT(vmresume()); vmwrite(GUEST_RIP, 0xc0ffee); - GUEST_SYNC(8); + GUEST_SYNC(9); GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa)); @@ -101,7 +103,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); GUEST_ASSERT(vmlaunch()); GUEST_ASSERT(vmresume()); - GUEST_SYNC(12); + GUEST_SYNC(13); GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); GUEST_ASSERT(vmlaunch()); GUEST_ASSERT(vmresume()); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index 38a91a5f04ac..18fa64db0d7a 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -94,6 +94,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); /* Prepare the VMCS for L2 execution. */ prepare_vmcs(vmx_pages, l2_guest_code, -- cgit v1.2.3 From c939989d74e27f44229a51d8fe96b5c297965bfa Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 16 Oct 2018 18:50:10 +0200 Subject: tools/headers: update kvm.h Pick up the latest kvm.h definitions. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/include/uapi/linux/kvm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 07548de5c988..2875ce85b322 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -719,6 +719,7 @@ struct kvm_ppc_one_seg_page_size { #define KVM_PPC_PAGE_SIZES_REAL 0x00000001 #define KVM_PPC_1T_SEGMENTS 0x00000002 +#define KVM_PPC_NO_HASH 0x00000004 struct kvm_ppc_smmu_info { __u64 flags; @@ -952,6 +953,11 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_HPAGE_1M 156 #define KVM_CAP_NESTED_STATE 157 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 +#define KVM_CAP_MSR_PLATFORM_INFO 159 +#define KVM_CAP_PPC_NESTED_HV 160 +#define KVM_CAP_HYPERV_SEND_IPI 161 +#define KVM_CAP_COALESCED_PIO 162 +#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 18178ff862170ee4c9fd7450173c6fba9e3c4c3a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 16 Oct 2018 18:50:11 +0200 Subject: KVM: selftests: add Enlightened VMCS test Modify test library and add eVMCS test. This includes nVMX save/restore testing. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/include/evmcs.h | 1098 ++++++++++++++++++++++ tools/testing/selftests/kvm/include/x86_64/vmx.h | 28 + tools/testing/selftests/kvm/lib/x86_64/vmx.c | 48 +- tools/testing/selftests/kvm/x86_64/evmcs_test.c | 160 ++++ 5 files changed, 1323 insertions(+), 12 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/evmcs.h create mode 100644 tools/testing/selftests/kvm/x86_64/evmcs_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 5e23f97ec46f..01a219229238 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -13,6 +13,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/state_test +TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_test diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h new file mode 100644 index 000000000000..4059014d93ea --- /dev/null +++ b/tools/testing/selftests/kvm/include/evmcs.h @@ -0,0 +1,1098 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * tools/testing/selftests/kvm/include/vmx.h + * + * Copyright (C) 2018, Red Hat, Inc. + * + */ + +#ifndef SELFTEST_KVM_EVMCS_H +#define SELFTEST_KVM_EVMCS_H + +#include +#include "vmx.h" + +#define u16 uint16_t +#define u32 uint32_t +#define u64 uint64_t + +extern bool enable_evmcs; + +struct hv_vp_assist_page { + __u32 apic_assist; + __u32 reserved; + __u64 vtl_control[2]; + __u64 nested_enlightenments_control[2]; + __u32 enlighten_vmentry; + __u64 current_nested_vmcs; +}; + +struct hv_enlightened_vmcs { + u32 revision_id; + u32 abort; + + u16 host_es_selector; + u16 host_cs_selector; + u16 host_ss_selector; + u16 host_ds_selector; + u16 host_fs_selector; + u16 host_gs_selector; + u16 host_tr_selector; + + u64 host_ia32_pat; + u64 host_ia32_efer; + + u64 host_cr0; + u64 host_cr3; + u64 host_cr4; + + u64 host_ia32_sysenter_esp; + u64 host_ia32_sysenter_eip; + u64 host_rip; + u32 host_ia32_sysenter_cs; + + u32 pin_based_vm_exec_control; + u32 vm_exit_controls; + u32 secondary_vm_exec_control; + + u64 io_bitmap_a; + u64 io_bitmap_b; + u64 msr_bitmap; + + u16 guest_es_selector; + u16 guest_cs_selector; + u16 guest_ss_selector; + u16 guest_ds_selector; + u16 guest_fs_selector; + u16 guest_gs_selector; + u16 guest_ldtr_selector; + u16 guest_tr_selector; + + u32 guest_es_limit; + u32 guest_cs_limit; + u32 guest_ss_limit; + u32 guest_ds_limit; + u32 guest_fs_limit; + u32 guest_gs_limit; + u32 guest_ldtr_limit; + u32 guest_tr_limit; + u32 guest_gdtr_limit; + u32 guest_idtr_limit; + + u32 guest_es_ar_bytes; + u32 guest_cs_ar_bytes; + u32 guest_ss_ar_bytes; + u32 guest_ds_ar_bytes; + u32 guest_fs_ar_bytes; + u32 guest_gs_ar_bytes; + u32 guest_ldtr_ar_bytes; + u32 guest_tr_ar_bytes; + + u64 guest_es_base; + u64 guest_cs_base; + u64 guest_ss_base; + u64 guest_ds_base; + u64 guest_fs_base; + u64 guest_gs_base; + u64 guest_ldtr_base; + u64 guest_tr_base; + u64 guest_gdtr_base; + u64 guest_idtr_base; + + u64 padding64_1[3]; + + u64 vm_exit_msr_store_addr; + u64 vm_exit_msr_load_addr; + u64 vm_entry_msr_load_addr; + + u64 cr3_target_value0; + u64 cr3_target_value1; + u64 cr3_target_value2; + u64 cr3_target_value3; + + u32 page_fault_error_code_mask; + u32 page_fault_error_code_match; + + u32 cr3_target_count; + u32 vm_exit_msr_store_count; + u32 vm_exit_msr_load_count; + u32 vm_entry_msr_load_count; + + u64 tsc_offset; + u64 virtual_apic_page_addr; + u64 vmcs_link_pointer; + + u64 guest_ia32_debugctl; + u64 guest_ia32_pat; + u64 guest_ia32_efer; + + u64 guest_pdptr0; + u64 guest_pdptr1; + u64 guest_pdptr2; + u64 guest_pdptr3; + + u64 guest_pending_dbg_exceptions; + u64 guest_sysenter_esp; + u64 guest_sysenter_eip; + + u32 guest_activity_state; + u32 guest_sysenter_cs; + + u64 cr0_guest_host_mask; + u64 cr4_guest_host_mask; + u64 cr0_read_shadow; + u64 cr4_read_shadow; + u64 guest_cr0; + u64 guest_cr3; + u64 guest_cr4; + u64 guest_dr7; + + u64 host_fs_base; + u64 host_gs_base; + u64 host_tr_base; + u64 host_gdtr_base; + u64 host_idtr_base; + u64 host_rsp; + + u64 ept_pointer; + + u16 virtual_processor_id; + u16 padding16[3]; + + u64 padding64_2[5]; + u64 guest_physical_address; + + u32 vm_instruction_error; + u32 vm_exit_reason; + u32 vm_exit_intr_info; + u32 vm_exit_intr_error_code; + u32 idt_vectoring_info_field; + u32 idt_vectoring_error_code; + u32 vm_exit_instruction_len; + u32 vmx_instruction_info; + + u64 exit_qualification; + u64 exit_io_instruction_ecx; + u64 exit_io_instruction_esi; + u64 exit_io_instruction_edi; + u64 exit_io_instruction_eip; + + u64 guest_linear_address; + u64 guest_rsp; + u64 guest_rflags; + + u32 guest_interruptibility_info; + u32 cpu_based_vm_exec_control; + u32 exception_bitmap; + u32 vm_entry_controls; + u32 vm_entry_intr_info_field; + u32 vm_entry_exception_error_code; + u32 vm_entry_instruction_len; + u32 tpr_threshold; + + u64 guest_rip; + + u32 hv_clean_fields; + u32 hv_padding_32; + u32 hv_synthetic_controls; + struct { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 reserved:30; + } hv_enlightenments_control; + u32 hv_vp_id; + + u64 hv_vm_id; + u64 partition_assist_page; + u64 padding64_4[4]; + u64 guest_bndcfgs; + u64 padding64_5[7]; + u64 xss_exit_bitmap; + u64 padding64_6[7]; +}; + +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 +#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) + +struct hv_enlightened_vmcs *current_evmcs; +struct hv_vp_assist_page *current_vp_assist; + +static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +{ + u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | + HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); + + current_vp_assist = vp_assist; + + enable_evmcs = true; + + return 0; +} + +static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) +{ + current_vp_assist->current_nested_vmcs = vmcs_pa; + current_vp_assist->enlighten_vmentry = 1; + + current_evmcs = vmcs; + + return 0; +} + +static inline int evmcs_vmptrst(uint64_t *value) +{ + *value = current_vp_assist->current_nested_vmcs & + ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + return 0; +} + +static inline int evmcs_vmread(uint64_t encoding, uint64_t *value) +{ + switch (encoding) { + case GUEST_RIP: + *value = current_evmcs->guest_rip; + break; + case GUEST_RSP: + *value = current_evmcs->guest_rsp; + break; + case GUEST_RFLAGS: + *value = current_evmcs->guest_rflags; + break; + case HOST_IA32_PAT: + *value = current_evmcs->host_ia32_pat; + break; + case HOST_IA32_EFER: + *value = current_evmcs->host_ia32_efer; + break; + case HOST_CR0: + *value = current_evmcs->host_cr0; + break; + case HOST_CR3: + *value = current_evmcs->host_cr3; + break; + case HOST_CR4: + *value = current_evmcs->host_cr4; + break; + case HOST_IA32_SYSENTER_ESP: + *value = current_evmcs->host_ia32_sysenter_esp; + break; + case HOST_IA32_SYSENTER_EIP: + *value = current_evmcs->host_ia32_sysenter_eip; + break; + case HOST_RIP: + *value = current_evmcs->host_rip; + break; + case IO_BITMAP_A: + *value = current_evmcs->io_bitmap_a; + break; + case IO_BITMAP_B: + *value = current_evmcs->io_bitmap_b; + break; + case MSR_BITMAP: + *value = current_evmcs->msr_bitmap; + break; + case GUEST_ES_BASE: + *value = current_evmcs->guest_es_base; + break; + case GUEST_CS_BASE: + *value = current_evmcs->guest_cs_base; + break; + case GUEST_SS_BASE: + *value = current_evmcs->guest_ss_base; + break; + case GUEST_DS_BASE: + *value = current_evmcs->guest_ds_base; + break; + case GUEST_FS_BASE: + *value = current_evmcs->guest_fs_base; + break; + case GUEST_GS_BASE: + *value = current_evmcs->guest_gs_base; + break; + case GUEST_LDTR_BASE: + *value = current_evmcs->guest_ldtr_base; + break; + case GUEST_TR_BASE: + *value = current_evmcs->guest_tr_base; + break; + case GUEST_GDTR_BASE: + *value = current_evmcs->guest_gdtr_base; + break; + case GUEST_IDTR_BASE: + *value = current_evmcs->guest_idtr_base; + break; + case TSC_OFFSET: + *value = current_evmcs->tsc_offset; + break; + case VIRTUAL_APIC_PAGE_ADDR: + *value = current_evmcs->virtual_apic_page_addr; + break; + case VMCS_LINK_POINTER: + *value = current_evmcs->vmcs_link_pointer; + break; + case GUEST_IA32_DEBUGCTL: + *value = current_evmcs->guest_ia32_debugctl; + break; + case GUEST_IA32_PAT: + *value = current_evmcs->guest_ia32_pat; + break; + case GUEST_IA32_EFER: + *value = current_evmcs->guest_ia32_efer; + break; + case GUEST_PDPTR0: + *value = current_evmcs->guest_pdptr0; + break; + case GUEST_PDPTR1: + *value = current_evmcs->guest_pdptr1; + break; + case GUEST_PDPTR2: + *value = current_evmcs->guest_pdptr2; + break; + case GUEST_PDPTR3: + *value = current_evmcs->guest_pdptr3; + break; + case GUEST_PENDING_DBG_EXCEPTIONS: + *value = current_evmcs->guest_pending_dbg_exceptions; + break; + case GUEST_SYSENTER_ESP: + *value = current_evmcs->guest_sysenter_esp; + break; + case GUEST_SYSENTER_EIP: + *value = current_evmcs->guest_sysenter_eip; + break; + case CR0_GUEST_HOST_MASK: + *value = current_evmcs->cr0_guest_host_mask; + break; + case CR4_GUEST_HOST_MASK: + *value = current_evmcs->cr4_guest_host_mask; + break; + case CR0_READ_SHADOW: + *value = current_evmcs->cr0_read_shadow; + break; + case CR4_READ_SHADOW: + *value = current_evmcs->cr4_read_shadow; + break; + case GUEST_CR0: + *value = current_evmcs->guest_cr0; + break; + case GUEST_CR3: + *value = current_evmcs->guest_cr3; + break; + case GUEST_CR4: + *value = current_evmcs->guest_cr4; + break; + case GUEST_DR7: + *value = current_evmcs->guest_dr7; + break; + case HOST_FS_BASE: + *value = current_evmcs->host_fs_base; + break; + case HOST_GS_BASE: + *value = current_evmcs->host_gs_base; + break; + case HOST_TR_BASE: + *value = current_evmcs->host_tr_base; + break; + case HOST_GDTR_BASE: + *value = current_evmcs->host_gdtr_base; + break; + case HOST_IDTR_BASE: + *value = current_evmcs->host_idtr_base; + break; + case HOST_RSP: + *value = current_evmcs->host_rsp; + break; + case EPT_POINTER: + *value = current_evmcs->ept_pointer; + break; + case GUEST_BNDCFGS: + *value = current_evmcs->guest_bndcfgs; + break; + case XSS_EXIT_BITMAP: + *value = current_evmcs->xss_exit_bitmap; + break; + case GUEST_PHYSICAL_ADDRESS: + *value = current_evmcs->guest_physical_address; + break; + case EXIT_QUALIFICATION: + *value = current_evmcs->exit_qualification; + break; + case GUEST_LINEAR_ADDRESS: + *value = current_evmcs->guest_linear_address; + break; + case VM_EXIT_MSR_STORE_ADDR: + *value = current_evmcs->vm_exit_msr_store_addr; + break; + case VM_EXIT_MSR_LOAD_ADDR: + *value = current_evmcs->vm_exit_msr_load_addr; + break; + case VM_ENTRY_MSR_LOAD_ADDR: + *value = current_evmcs->vm_entry_msr_load_addr; + break; + case CR3_TARGET_VALUE0: + *value = current_evmcs->cr3_target_value0; + break; + case CR3_TARGET_VALUE1: + *value = current_evmcs->cr3_target_value1; + break; + case CR3_TARGET_VALUE2: + *value = current_evmcs->cr3_target_value2; + break; + case CR3_TARGET_VALUE3: + *value = current_evmcs->cr3_target_value3; + break; + case TPR_THRESHOLD: + *value = current_evmcs->tpr_threshold; + break; + case GUEST_INTERRUPTIBILITY_INFO: + *value = current_evmcs->guest_interruptibility_info; + break; + case CPU_BASED_VM_EXEC_CONTROL: + *value = current_evmcs->cpu_based_vm_exec_control; + break; + case EXCEPTION_BITMAP: + *value = current_evmcs->exception_bitmap; + break; + case VM_ENTRY_CONTROLS: + *value = current_evmcs->vm_entry_controls; + break; + case VM_ENTRY_INTR_INFO_FIELD: + *value = current_evmcs->vm_entry_intr_info_field; + break; + case VM_ENTRY_EXCEPTION_ERROR_CODE: + *value = current_evmcs->vm_entry_exception_error_code; + break; + case VM_ENTRY_INSTRUCTION_LEN: + *value = current_evmcs->vm_entry_instruction_len; + break; + case HOST_IA32_SYSENTER_CS: + *value = current_evmcs->host_ia32_sysenter_cs; + break; + case PIN_BASED_VM_EXEC_CONTROL: + *value = current_evmcs->pin_based_vm_exec_control; + break; + case VM_EXIT_CONTROLS: + *value = current_evmcs->vm_exit_controls; + break; + case SECONDARY_VM_EXEC_CONTROL: + *value = current_evmcs->secondary_vm_exec_control; + break; + case GUEST_ES_LIMIT: + *value = current_evmcs->guest_es_limit; + break; + case GUEST_CS_LIMIT: + *value = current_evmcs->guest_cs_limit; + break; + case GUEST_SS_LIMIT: + *value = current_evmcs->guest_ss_limit; + break; + case GUEST_DS_LIMIT: + *value = current_evmcs->guest_ds_limit; + break; + case GUEST_FS_LIMIT: + *value = current_evmcs->guest_fs_limit; + break; + case GUEST_GS_LIMIT: + *value = current_evmcs->guest_gs_limit; + break; + case GUEST_LDTR_LIMIT: + *value = current_evmcs->guest_ldtr_limit; + break; + case GUEST_TR_LIMIT: + *value = current_evmcs->guest_tr_limit; + break; + case GUEST_GDTR_LIMIT: + *value = current_evmcs->guest_gdtr_limit; + break; + case GUEST_IDTR_LIMIT: + *value = current_evmcs->guest_idtr_limit; + break; + case GUEST_ES_AR_BYTES: + *value = current_evmcs->guest_es_ar_bytes; + break; + case GUEST_CS_AR_BYTES: + *value = current_evmcs->guest_cs_ar_bytes; + break; + case GUEST_SS_AR_BYTES: + *value = current_evmcs->guest_ss_ar_bytes; + break; + case GUEST_DS_AR_BYTES: + *value = current_evmcs->guest_ds_ar_bytes; + break; + case GUEST_FS_AR_BYTES: + *value = current_evmcs->guest_fs_ar_bytes; + break; + case GUEST_GS_AR_BYTES: + *value = current_evmcs->guest_gs_ar_bytes; + break; + case GUEST_LDTR_AR_BYTES: + *value = current_evmcs->guest_ldtr_ar_bytes; + break; + case GUEST_TR_AR_BYTES: + *value = current_evmcs->guest_tr_ar_bytes; + break; + case GUEST_ACTIVITY_STATE: + *value = current_evmcs->guest_activity_state; + break; + case GUEST_SYSENTER_CS: + *value = current_evmcs->guest_sysenter_cs; + break; + case VM_INSTRUCTION_ERROR: + *value = current_evmcs->vm_instruction_error; + break; + case VM_EXIT_REASON: + *value = current_evmcs->vm_exit_reason; + break; + case VM_EXIT_INTR_INFO: + *value = current_evmcs->vm_exit_intr_info; + break; + case VM_EXIT_INTR_ERROR_CODE: + *value = current_evmcs->vm_exit_intr_error_code; + break; + case IDT_VECTORING_INFO_FIELD: + *value = current_evmcs->idt_vectoring_info_field; + break; + case IDT_VECTORING_ERROR_CODE: + *value = current_evmcs->idt_vectoring_error_code; + break; + case VM_EXIT_INSTRUCTION_LEN: + *value = current_evmcs->vm_exit_instruction_len; + break; + case VMX_INSTRUCTION_INFO: + *value = current_evmcs->vmx_instruction_info; + break; + case PAGE_FAULT_ERROR_CODE_MASK: + *value = current_evmcs->page_fault_error_code_mask; + break; + case PAGE_FAULT_ERROR_CODE_MATCH: + *value = current_evmcs->page_fault_error_code_match; + break; + case CR3_TARGET_COUNT: + *value = current_evmcs->cr3_target_count; + break; + case VM_EXIT_MSR_STORE_COUNT: + *value = current_evmcs->vm_exit_msr_store_count; + break; + case VM_EXIT_MSR_LOAD_COUNT: + *value = current_evmcs->vm_exit_msr_load_count; + break; + case VM_ENTRY_MSR_LOAD_COUNT: + *value = current_evmcs->vm_entry_msr_load_count; + break; + case HOST_ES_SELECTOR: + *value = current_evmcs->host_es_selector; + break; + case HOST_CS_SELECTOR: + *value = current_evmcs->host_cs_selector; + break; + case HOST_SS_SELECTOR: + *value = current_evmcs->host_ss_selector; + break; + case HOST_DS_SELECTOR: + *value = current_evmcs->host_ds_selector; + break; + case HOST_FS_SELECTOR: + *value = current_evmcs->host_fs_selector; + break; + case HOST_GS_SELECTOR: + *value = current_evmcs->host_gs_selector; + break; + case HOST_TR_SELECTOR: + *value = current_evmcs->host_tr_selector; + break; + case GUEST_ES_SELECTOR: + *value = current_evmcs->guest_es_selector; + break; + case GUEST_CS_SELECTOR: + *value = current_evmcs->guest_cs_selector; + break; + case GUEST_SS_SELECTOR: + *value = current_evmcs->guest_ss_selector; + break; + case GUEST_DS_SELECTOR: + *value = current_evmcs->guest_ds_selector; + break; + case GUEST_FS_SELECTOR: + *value = current_evmcs->guest_fs_selector; + break; + case GUEST_GS_SELECTOR: + *value = current_evmcs->guest_gs_selector; + break; + case GUEST_LDTR_SELECTOR: + *value = current_evmcs->guest_ldtr_selector; + break; + case GUEST_TR_SELECTOR: + *value = current_evmcs->guest_tr_selector; + break; + case VIRTUAL_PROCESSOR_ID: + *value = current_evmcs->virtual_processor_id; + break; + default: return 1; + } + + return 0; +} + +static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value) +{ + switch (encoding) { + case GUEST_RIP: + current_evmcs->guest_rip = value; + break; + case GUEST_RSP: + current_evmcs->guest_rsp = value; + break; + case GUEST_RFLAGS: + current_evmcs->guest_rflags = value; + break; + case HOST_IA32_PAT: + current_evmcs->host_ia32_pat = value; + break; + case HOST_IA32_EFER: + current_evmcs->host_ia32_efer = value; + break; + case HOST_CR0: + current_evmcs->host_cr0 = value; + break; + case HOST_CR3: + current_evmcs->host_cr3 = value; + break; + case HOST_CR4: + current_evmcs->host_cr4 = value; + break; + case HOST_IA32_SYSENTER_ESP: + current_evmcs->host_ia32_sysenter_esp = value; + break; + case HOST_IA32_SYSENTER_EIP: + current_evmcs->host_ia32_sysenter_eip = value; + break; + case HOST_RIP: + current_evmcs->host_rip = value; + break; + case IO_BITMAP_A: + current_evmcs->io_bitmap_a = value; + break; + case IO_BITMAP_B: + current_evmcs->io_bitmap_b = value; + break; + case MSR_BITMAP: + current_evmcs->msr_bitmap = value; + break; + case GUEST_ES_BASE: + current_evmcs->guest_es_base = value; + break; + case GUEST_CS_BASE: + current_evmcs->guest_cs_base = value; + break; + case GUEST_SS_BASE: + current_evmcs->guest_ss_base = value; + break; + case GUEST_DS_BASE: + current_evmcs->guest_ds_base = value; + break; + case GUEST_FS_BASE: + current_evmcs->guest_fs_base = value; + break; + case GUEST_GS_BASE: + current_evmcs->guest_gs_base = value; + break; + case GUEST_LDTR_BASE: + current_evmcs->guest_ldtr_base = value; + break; + case GUEST_TR_BASE: + current_evmcs->guest_tr_base = value; + break; + case GUEST_GDTR_BASE: + current_evmcs->guest_gdtr_base = value; + break; + case GUEST_IDTR_BASE: + current_evmcs->guest_idtr_base = value; + break; + case TSC_OFFSET: + current_evmcs->tsc_offset = value; + break; + case VIRTUAL_APIC_PAGE_ADDR: + current_evmcs->virtual_apic_page_addr = value; + break; + case VMCS_LINK_POINTER: + current_evmcs->vmcs_link_pointer = value; + break; + case GUEST_IA32_DEBUGCTL: + current_evmcs->guest_ia32_debugctl = value; + break; + case GUEST_IA32_PAT: + current_evmcs->guest_ia32_pat = value; + break; + case GUEST_IA32_EFER: + current_evmcs->guest_ia32_efer = value; + break; + case GUEST_PDPTR0: + current_evmcs->guest_pdptr0 = value; + break; + case GUEST_PDPTR1: + current_evmcs->guest_pdptr1 = value; + break; + case GUEST_PDPTR2: + current_evmcs->guest_pdptr2 = value; + break; + case GUEST_PDPTR3: + current_evmcs->guest_pdptr3 = value; + break; + case GUEST_PENDING_DBG_EXCEPTIONS: + current_evmcs->guest_pending_dbg_exceptions = value; + break; + case GUEST_SYSENTER_ESP: + current_evmcs->guest_sysenter_esp = value; + break; + case GUEST_SYSENTER_EIP: + current_evmcs->guest_sysenter_eip = value; + break; + case CR0_GUEST_HOST_MASK: + current_evmcs->cr0_guest_host_mask = value; + break; + case CR4_GUEST_HOST_MASK: + current_evmcs->cr4_guest_host_mask = value; + break; + case CR0_READ_SHADOW: + current_evmcs->cr0_read_shadow = value; + break; + case CR4_READ_SHADOW: + current_evmcs->cr4_read_shadow = value; + break; + case GUEST_CR0: + current_evmcs->guest_cr0 = value; + break; + case GUEST_CR3: + current_evmcs->guest_cr3 = value; + break; + case GUEST_CR4: + current_evmcs->guest_cr4 = value; + break; + case GUEST_DR7: + current_evmcs->guest_dr7 = value; + break; + case HOST_FS_BASE: + current_evmcs->host_fs_base = value; + break; + case HOST_GS_BASE: + current_evmcs->host_gs_base = value; + break; + case HOST_TR_BASE: + current_evmcs->host_tr_base = value; + break; + case HOST_GDTR_BASE: + current_evmcs->host_gdtr_base = value; + break; + case HOST_IDTR_BASE: + current_evmcs->host_idtr_base = value; + break; + case HOST_RSP: + current_evmcs->host_rsp = value; + break; + case EPT_POINTER: + current_evmcs->ept_pointer = value; + break; + case GUEST_BNDCFGS: + current_evmcs->guest_bndcfgs = value; + break; + case XSS_EXIT_BITMAP: + current_evmcs->xss_exit_bitmap = value; + break; + case GUEST_PHYSICAL_ADDRESS: + current_evmcs->guest_physical_address = value; + break; + case EXIT_QUALIFICATION: + current_evmcs->exit_qualification = value; + break; + case GUEST_LINEAR_ADDRESS: + current_evmcs->guest_linear_address = value; + break; + case VM_EXIT_MSR_STORE_ADDR: + current_evmcs->vm_exit_msr_store_addr = value; + break; + case VM_EXIT_MSR_LOAD_ADDR: + current_evmcs->vm_exit_msr_load_addr = value; + break; + case VM_ENTRY_MSR_LOAD_ADDR: + current_evmcs->vm_entry_msr_load_addr = value; + break; + case CR3_TARGET_VALUE0: + current_evmcs->cr3_target_value0 = value; + break; + case CR3_TARGET_VALUE1: + current_evmcs->cr3_target_value1 = value; + break; + case CR3_TARGET_VALUE2: + current_evmcs->cr3_target_value2 = value; + break; + case CR3_TARGET_VALUE3: + current_evmcs->cr3_target_value3 = value; + break; + case TPR_THRESHOLD: + current_evmcs->tpr_threshold = value; + break; + case GUEST_INTERRUPTIBILITY_INFO: + current_evmcs->guest_interruptibility_info = value; + break; + case CPU_BASED_VM_EXEC_CONTROL: + current_evmcs->cpu_based_vm_exec_control = value; + break; + case EXCEPTION_BITMAP: + current_evmcs->exception_bitmap = value; + break; + case VM_ENTRY_CONTROLS: + current_evmcs->vm_entry_controls = value; + break; + case VM_ENTRY_INTR_INFO_FIELD: + current_evmcs->vm_entry_intr_info_field = value; + break; + case VM_ENTRY_EXCEPTION_ERROR_CODE: + current_evmcs->vm_entry_exception_error_code = value; + break; + case VM_ENTRY_INSTRUCTION_LEN: + current_evmcs->vm_entry_instruction_len = value; + break; + case HOST_IA32_SYSENTER_CS: + current_evmcs->host_ia32_sysenter_cs = value; + break; + case PIN_BASED_VM_EXEC_CONTROL: + current_evmcs->pin_based_vm_exec_control = value; + break; + case VM_EXIT_CONTROLS: + current_evmcs->vm_exit_controls = value; + break; + case SECONDARY_VM_EXEC_CONTROL: + current_evmcs->secondary_vm_exec_control = value; + break; + case GUEST_ES_LIMIT: + current_evmcs->guest_es_limit = value; + break; + case GUEST_CS_LIMIT: + current_evmcs->guest_cs_limit = value; + break; + case GUEST_SS_LIMIT: + current_evmcs->guest_ss_limit = value; + break; + case GUEST_DS_LIMIT: + current_evmcs->guest_ds_limit = value; + break; + case GUEST_FS_LIMIT: + current_evmcs->guest_fs_limit = value; + break; + case GUEST_GS_LIMIT: + current_evmcs->guest_gs_limit = value; + break; + case GUEST_LDTR_LIMIT: + current_evmcs->guest_ldtr_limit = value; + break; + case GUEST_TR_LIMIT: + current_evmcs->guest_tr_limit = value; + break; + case GUEST_GDTR_LIMIT: + current_evmcs->guest_gdtr_limit = value; + break; + case GUEST_IDTR_LIMIT: + current_evmcs->guest_idtr_limit = value; + break; + case GUEST_ES_AR_BYTES: + current_evmcs->guest_es_ar_bytes = value; + break; + case GUEST_CS_AR_BYTES: + current_evmcs->guest_cs_ar_bytes = value; + break; + case GUEST_SS_AR_BYTES: + current_evmcs->guest_ss_ar_bytes = value; + break; + case GUEST_DS_AR_BYTES: + current_evmcs->guest_ds_ar_bytes = value; + break; + case GUEST_FS_AR_BYTES: + current_evmcs->guest_fs_ar_bytes = value; + break; + case GUEST_GS_AR_BYTES: + current_evmcs->guest_gs_ar_bytes = value; + break; + case GUEST_LDTR_AR_BYTES: + current_evmcs->guest_ldtr_ar_bytes = value; + break; + case GUEST_TR_AR_BYTES: + current_evmcs->guest_tr_ar_bytes = value; + break; + case GUEST_ACTIVITY_STATE: + current_evmcs->guest_activity_state = value; + break; + case GUEST_SYSENTER_CS: + current_evmcs->guest_sysenter_cs = value; + break; + case VM_INSTRUCTION_ERROR: + current_evmcs->vm_instruction_error = value; + break; + case VM_EXIT_REASON: + current_evmcs->vm_exit_reason = value; + break; + case VM_EXIT_INTR_INFO: + current_evmcs->vm_exit_intr_info = value; + break; + case VM_EXIT_INTR_ERROR_CODE: + current_evmcs->vm_exit_intr_error_code = value; + break; + case IDT_VECTORING_INFO_FIELD: + current_evmcs->idt_vectoring_info_field = value; + break; + case IDT_VECTORING_ERROR_CODE: + current_evmcs->idt_vectoring_error_code = value; + break; + case VM_EXIT_INSTRUCTION_LEN: + current_evmcs->vm_exit_instruction_len = value; + break; + case VMX_INSTRUCTION_INFO: + current_evmcs->vmx_instruction_info = value; + break; + case PAGE_FAULT_ERROR_CODE_MASK: + current_evmcs->page_fault_error_code_mask = value; + break; + case PAGE_FAULT_ERROR_CODE_MATCH: + current_evmcs->page_fault_error_code_match = value; + break; + case CR3_TARGET_COUNT: + current_evmcs->cr3_target_count = value; + break; + case VM_EXIT_MSR_STORE_COUNT: + current_evmcs->vm_exit_msr_store_count = value; + break; + case VM_EXIT_MSR_LOAD_COUNT: + current_evmcs->vm_exit_msr_load_count = value; + break; + case VM_ENTRY_MSR_LOAD_COUNT: + current_evmcs->vm_entry_msr_load_count = value; + break; + case HOST_ES_SELECTOR: + current_evmcs->host_es_selector = value; + break; + case HOST_CS_SELECTOR: + current_evmcs->host_cs_selector = value; + break; + case HOST_SS_SELECTOR: + current_evmcs->host_ss_selector = value; + break; + case HOST_DS_SELECTOR: + current_evmcs->host_ds_selector = value; + break; + case HOST_FS_SELECTOR: + current_evmcs->host_fs_selector = value; + break; + case HOST_GS_SELECTOR: + current_evmcs->host_gs_selector = value; + break; + case HOST_TR_SELECTOR: + current_evmcs->host_tr_selector = value; + break; + case GUEST_ES_SELECTOR: + current_evmcs->guest_es_selector = value; + break; + case GUEST_CS_SELECTOR: + current_evmcs->guest_cs_selector = value; + break; + case GUEST_SS_SELECTOR: + current_evmcs->guest_ss_selector = value; + break; + case GUEST_DS_SELECTOR: + current_evmcs->guest_ds_selector = value; + break; + case GUEST_FS_SELECTOR: + current_evmcs->guest_fs_selector = value; + break; + case GUEST_GS_SELECTOR: + current_evmcs->guest_gs_selector = value; + break; + case GUEST_LDTR_SELECTOR: + current_evmcs->guest_ldtr_selector = value; + break; + case GUEST_TR_SELECTOR: + current_evmcs->guest_tr_selector = value; + break; + case VIRTUAL_PROCESSOR_ID: + current_evmcs->virtual_processor_id = value; + break; + default: return 1; + } + + return 0; +} + +static inline int evmcs_vmlaunch(void) +{ + int ret; + + current_evmcs->hv_clean_fields = 0; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "mov %%rsp, (%[host_rsp]);" + "lea 1f(%%rip), %%rax;" + "mov %%rax, (%[host_rip]);" + "vmlaunch;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r" + ((uint64_t)¤t_evmcs->host_rsp), + [host_rip]"r" + ((uint64_t)¤t_evmcs->host_rip) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmresume. + */ +static inline int evmcs_vmresume(void) +{ + int ret; + + current_evmcs->hv_clean_fields = 0; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "mov %%rsp, (%[host_rsp]);" + "lea 1f(%%rip), %%rax;" + "mov %%rax, (%[host_rip]);" + "vmresume;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r" + ((uint64_t)¤t_evmcs->host_rsp), + [host_rip]"r" + ((uint64_t)¤t_evmcs->host_rip) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +#endif /* !SELFTEST_KVM_EVMCS_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 4bbee8560292..c9bd935b939c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -339,6 +339,8 @@ struct vmx_msr_entry { uint64_t value; } __attribute__ ((aligned(16))); +#include "evmcs.h" + static inline int vmxon(uint64_t phys) { uint8_t ret; @@ -372,6 +374,9 @@ static inline int vmptrld(uint64_t vmcs_pa) { uint8_t ret; + if (enable_evmcs) + return -1; + __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" : [ret]"=rm"(ret) : [pa]"m"(vmcs_pa) @@ -385,6 +390,9 @@ static inline int vmptrst(uint64_t *value) uint64_t tmp; uint8_t ret; + if (enable_evmcs) + return evmcs_vmptrst(value); + __asm__ __volatile__("vmptrst %[value]; setna %[ret]" : [value]"=m"(tmp), [ret]"=rm"(ret) : : "cc", "memory"); @@ -411,6 +419,9 @@ static inline int vmlaunch(void) { int ret; + if (enable_evmcs) + return evmcs_vmlaunch(); + __asm__ __volatile__("push %%rbp;" "push %%rcx;" "push %%rdx;" @@ -443,6 +454,9 @@ static inline int vmresume(void) { int ret; + if (enable_evmcs) + return evmcs_vmresume(); + __asm__ __volatile__("push %%rbp;" "push %%rcx;" "push %%rdx;" @@ -482,6 +496,9 @@ static inline int vmread(uint64_t encoding, uint64_t *value) uint64_t tmp; uint8_t ret; + if (enable_evmcs) + return evmcs_vmread(encoding, value); + __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" : [value]"=rm"(tmp), [ret]"=rm"(ret) : [encoding]"r"(encoding) @@ -506,6 +523,9 @@ static inline int vmwrite(uint64_t encoding, uint64_t value) { uint8_t ret; + if (enable_evmcs) + return evmcs_vmwrite(encoding, value); + __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" : [ret]"=rm"(ret) : [value]"rm"(value), [encoding]"r"(encoding) @@ -543,6 +563,14 @@ struct vmx_pages { void *vmwrite_hva; uint64_t vmwrite_gpa; void *vmwrite; + + void *vp_assist_hva; + uint64_t vp_assist_gpa; + void *vp_assist; + + void *enlightened_vmcs_hva; + uint64_t enlightened_vmcs_gpa; + void *enlightened_vmcs; }; struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index cc356da9b3d8..771ba6bf751c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -13,6 +13,8 @@ #include "processor.h" #include "vmx.h" +bool enable_evmcs; + /* Allocate memory regions for nested VMX tests. * * Input Args: @@ -62,6 +64,20 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); memset(vmx->vmwrite_hva, 0, getpagesize()); + /* Setup of a region of guest memory for the VP Assist page. */ + vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist); + vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist); + + /* Setup of a region of guest memory for the enlightened VMCS. */ + vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + vmx->enlightened_vmcs_hva = + addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs); + vmx->enlightened_vmcs_gpa = + addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs); + *p_vmx_gva = vmx_gva; return vmx; } @@ -112,18 +128,26 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) bool load_vmcs(struct vmx_pages *vmx) { - /* Load a VMCS. */ - *(uint32_t *)(vmx->vmcs) = vmcs_revision(); - if (vmclear(vmx->vmcs_gpa)) - return false; - - if (vmptrld(vmx->vmcs_gpa)) - return false; - - /* Setup shadow VMCS, do not load it yet. */ - *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; - if (vmclear(vmx->shadow_vmcs_gpa)) - return false; + if (!enable_evmcs) { + /* Load a VMCS. */ + *(uint32_t *)(vmx->vmcs) = vmcs_revision(); + if (vmclear(vmx->vmcs_gpa)) + return false; + + if (vmptrld(vmx->vmcs_gpa)) + return false; + + /* Setup shadow VMCS, do not load it yet. */ + *(uint32_t *)(vmx->shadow_vmcs) = + vmcs_revision() | 0x80000000ul; + if (vmclear(vmx->shadow_vmcs_gpa)) + return false; + } else { + if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa, + vmx->enlightened_vmcs)) + return false; + current_evmcs->revision_id = vmcs_revision(); + } return true; } diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c new file mode 100644 index 000000000000..92c2cfd1b182 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018, Red Hat, Inc. + * + * Tests for Enlightened VMCS, including nested guest state. + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include +#include +#include +#include + +#include "test_util.h" + +#include "kvm_util.h" + +#include "vmx.h" + +#define VCPU_ID 5 + +static bool have_nested_state; + +void l2_guest_code(void) +{ + GUEST_SYNC(6); + + GUEST_SYNC(7); + + /* Done, exit to L1 and never come back. */ + vmcall(); +} + +void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist); + + GUEST_ASSERT(vmx_pages->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_SYNC(3); + GUEST_ASSERT(load_vmcs(vmx_pages)); + GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + + GUEST_SYNC(4); + GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(5); + GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_SYNC(8); + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_SYNC(9); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + GUEST_SYNC(1); + GUEST_SYNC(2); + + if (vmx_pages) + l1_guest_code(vmx_pages); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct vmx_pages *vmx_pages = NULL; + vm_vaddr_t vmx_pages_gva = 0; + + struct kvm_regs regs1, regs2; + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_x86_state *state; + struct ucall uc; + int stage; + uint16_t evmcs_ver; + struct kvm_enable_cap enable_evmcs_cap = { + .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS, + .args[0] = (unsigned long)&evmcs_ver + }; + + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + if (!kvm_check_cap(KVM_CAP_NESTED_STATE) || + !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { + printf("capabilities not available, skipping test\n"); + exit(KSFT_SKIP); + } + + vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); + + run = vcpu_state(vm, VCPU_ID); + + vcpu_regs_get(vm, VCPU_ID, ®s1); + + vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + + for (stage = 1;; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + } + + /* UCALL_SYNC is handled here. */ + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", + stage, (ulong)uc.args[1]); + + state = vcpu_save_state(vm, VCPU_ID); + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + kvm_vm_restart(vm, O_RDWR); + vm_vcpu_add(vm, VCPU_ID, 0, 0); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_load_state(vm, VCPU_ID, state); + run = vcpu_state(vm, VCPU_ID); + free(state); + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vm, VCPU_ID, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); + } + +done: + kvm_vm_free(vm); +} -- cgit v1.2.3 From 59073aaf6de0d2dacc2603cee6d1d6cd5592ac08 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 16 Oct 2018 14:29:20 -0700 Subject: kvm: x86: Add exception payload fields to kvm_vcpu_events The per-VM capability KVM_CAP_EXCEPTION_PAYLOAD (to be introduced in a later commit) adds the following fields to struct kvm_vcpu_events: exception_has_payload, exception_payload, and exception.pending. With this capability set, all of the details of vcpu->arch.exception, including the payload for a pending exception, are reported to userspace in response to KVM_GET_VCPU_EVENTS. With this capability clear, the original ABI is preserved, and the exception.injected field is set for either pending or injected exceptions. When userspace calls KVM_SET_VCPU_EVENTS with KVM_CAP_EXCEPTION_PAYLOAD clear, exception.injected is no longer translated to exception.pending. KVM_SET_VCPU_EVENTS can now only establish a pending exception when KVM_CAP_EXCEPTION_PAYLOAD is set. Reported-by: Jim Mattson Suggested-by: Paolo Bonzini Signed-off-by: Jim Mattson Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 24 ++++++++++---- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/uapi/asm/kvm.h | 7 ++-- arch/x86/kvm/x86.c | 61 ++++++++++++++++++++++++++--------- tools/arch/x86/include/uapi/asm/kvm.h | 10 ++++-- 5 files changed, 77 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index cf9422f4f101..e900ac31501c 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -850,7 +850,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 pending; __u32 error_code; } exception; struct { @@ -873,16 +873,23 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u8 reserved[27]; + __u8 exception_has_payload; + __u64 exception_payload; }; -Only two fields are defined in the flags field: +The following bits are defined in the flags field: -- KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that +- KVM_VCPUEVENT_VALID_SHADOW may be set to signal that interrupt.shadow contains a valid state. -- KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that - smi contains a valid state. +- KVM_VCPUEVENT_VALID_SMM may be set to signal that smi contains a + valid state. + +- KVM_VCPUEVENT_VALID_PAYLOAD may be set to signal that the + exception_has_payload, exception_payload, and exception.pending + fields contain a valid state. This bit will be set whenever + KVM_CAP_EXCEPTION_PAYLOAD is enabled. ARM/ARM64: @@ -962,6 +969,11 @@ shall be written into the VCPU. KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available. +If KVM_CAP_EXCEPTION_PAYLOAD is enabled, KVM_VCPUEVENT_VALID_PAYLOAD +can be set in the flags field to signal that the +exception_has_payload, exception_payload, and exception.pending fields +contain a valid state and shall be written into the VCPU. + ARM/ARM64: Set the pending SError exception state for this VCPU. It is not possible to diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 20f7c994afeb..55e51ff7e421 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -919,6 +919,7 @@ struct kvm_arch { bool x2apic_broadcast_quirk_disabled; bool guest_can_read_msr_platform_info; + bool exception_payload_enabled; }; struct kvm_vm_stat { diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index ab76aa1d3a4d..dabfcf7c3941 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -288,6 +288,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -299,7 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 pending; __u32 error_code; } exception; struct { @@ -322,7 +323,9 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u8 reserved[27]; + __u8 exception_has_payload; + __u64 exception_payload; }; /* for KVM_GET/SET_DEBUGREGS */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 532b660fe497..2ef706574589 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3373,19 +3373,33 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { process_nmi(vcpu); + /* - * FIXME: pass injected and pending separately. This is only - * needed for nested virtualization, whose state cannot be - * migrated yet. For now we can combine them. + * The API doesn't provide the instruction length for software + * exceptions, so don't report them. As long as the guest RIP + * isn't advanced, we should expect to encounter the exception + * again. */ - events->exception.injected = - (vcpu->arch.exception.pending || - vcpu->arch.exception.injected) && - !kvm_exception_is_soft(vcpu->arch.exception.nr); + if (kvm_exception_is_soft(vcpu->arch.exception.nr)) { + events->exception.injected = 0; + events->exception.pending = 0; + } else { + events->exception.injected = vcpu->arch.exception.injected; + events->exception.pending = vcpu->arch.exception.pending; + /* + * For ABI compatibility, deliberately conflate + * pending and injected exceptions when + * KVM_CAP_EXCEPTION_PAYLOAD isn't enabled. + */ + if (!vcpu->kvm->arch.exception_payload_enabled) + events->exception.injected |= + vcpu->arch.exception.pending; + } events->exception.nr = vcpu->arch.exception.nr; events->exception.has_error_code = vcpu->arch.exception.has_error_code; - events->exception.pad = 0; events->exception.error_code = vcpu->arch.exception.error_code; + events->exception_has_payload = vcpu->arch.exception.has_payload; + events->exception_payload = vcpu->arch.exception.payload; events->interrupt.injected = vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; @@ -3409,6 +3423,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SHADOW | KVM_VCPUEVENT_VALID_SMM); + if (vcpu->kvm->arch.exception_payload_enabled) + events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD; + memset(&events->reserved, 0, sizeof(events->reserved)); } @@ -3420,12 +3437,24 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR | KVM_VCPUEVENT_VALID_SHADOW - | KVM_VCPUEVENT_VALID_SMM)) + | KVM_VCPUEVENT_VALID_SMM + | KVM_VCPUEVENT_VALID_PAYLOAD)) return -EINVAL; - if (events->exception.injected && - (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR || - is_guest_mode(vcpu))) + if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) { + if (!vcpu->kvm->arch.exception_payload_enabled) + return -EINVAL; + if (events->exception.pending) + events->exception.injected = 0; + else + events->exception_has_payload = 0; + } else { + events->exception.pending = 0; + events->exception_has_payload = 0; + } + + if ((events->exception.injected || events->exception.pending) && + (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR)) return -EINVAL; /* INITs are latched while in SMM */ @@ -3435,13 +3464,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return -EINVAL; process_nmi(vcpu); - vcpu->arch.exception.injected = false; - vcpu->arch.exception.pending = events->exception.injected; + vcpu->arch.exception.injected = events->exception.injected; + vcpu->arch.exception.pending = events->exception.pending; vcpu->arch.exception.nr = events->exception.nr; vcpu->arch.exception.has_error_code = events->exception.has_error_code; vcpu->arch.exception.error_code = events->exception.error_code; - vcpu->arch.exception.has_payload = false; - vcpu->arch.exception.payload = 0; + vcpu->arch.exception.has_payload = events->exception_has_payload; + vcpu->arch.exception.payload = events->exception_payload; vcpu->arch.interrupt.injected = events->interrupt.injected; vcpu->arch.interrupt.nr = events->interrupt.nr; diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 86299efa804a..b09875f580d5 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -288,6 +288,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -299,7 +300,10 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + union { + __u8 pad; + __u8 pending; + }; __u32 error_code; } exception; struct { @@ -322,7 +326,9 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u8 reserved[27]; + __u8 exception_has_payload; + __u64 exception_payload; }; /* for KVM_GET/SET_DEBUGREGS */ -- cgit v1.2.3 From f15ac811e80b857a443476de60ce70d3e6049ae5 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 19 Oct 2018 16:38:16 +0200 Subject: selftests/kvm: add missing executables to .gitignore Fixes: 18178ff86217 ("KVM: selftests: add Enlightened VMCS test") Signed-off-by: Anders Roxell Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 552b0d9a49f4..6210ba41c29e 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,4 +1,5 @@ /x86_64/cr4_cpuid_sync_test +/x86_64/evmcs_test /x86_64/platform_info_test /x86_64/set_sregs_test /x86_64/sync_regs_test -- cgit v1.2.3