diff options
Diffstat (limited to 'tools')
27 files changed, 1243 insertions, 571 deletions
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 0067462edc3e..192694c97356 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -89,11 +89,13 @@ TEST_GEN_PROGS_x86 += x86/kvm_buslock_test TEST_GEN_PROGS_x86 += x86/monitor_mwait_test TEST_GEN_PROGS_x86 += x86/msrs_test TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test +TEST_GEN_PROGS_x86 += x86/nested_dirty_log_test TEST_GEN_PROGS_x86 += x86/nested_emulation_test TEST_GEN_PROGS_x86 += x86/nested_exceptions_test TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test +TEST_GEN_PROGS_x86 += x86/nested_vmsave_vmload_test TEST_GEN_PROGS_x86 += x86/platform_info_test TEST_GEN_PROGS_x86 += x86/pmu_counters_test TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test @@ -115,7 +117,6 @@ TEST_GEN_PROGS_x86 += x86/ucna_injection_test TEST_GEN_PROGS_x86 += x86/userspace_io_test TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test -TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86 += x86/vmx_msrs_test TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state @@ -124,6 +125,7 @@ TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test TEST_GEN_PROGS_x86 += x86/xapic_ipi_test TEST_GEN_PROGS_x86 += x86/xapic_state_test +TEST_GEN_PROGS_x86 += x86/xapic_tpr_test TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test TEST_GEN_PROGS_x86 += x86/xss_msr_test TEST_GEN_PROGS_x86 += x86/debug_regs diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h index b973bb2c64a6..4a2033708227 100644 --- a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h @@ -2,6 +2,8 @@ #ifndef SELFTEST_KVM_UTIL_ARCH_H #define SELFTEST_KVM_UTIL_ARCH_H +struct kvm_mmu_arch {}; + struct kvm_vm_arch { bool has_gic; int gic_fd; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 97f9251eb073..71a7e4401832 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -88,12 +88,19 @@ enum kvm_mem_region_type { NR_MEM_REGIONS, }; +struct kvm_mmu { + bool pgd_created; + uint64_t pgd; + int pgtable_levels; + + struct kvm_mmu_arch arch; +}; + struct kvm_vm { int mode; unsigned long type; int kvm_fd; int fd; - unsigned int pgtable_levels; unsigned int page_size; unsigned int page_shift; unsigned int pa_bits; @@ -104,13 +111,18 @@ struct kvm_vm { struct sparsebit *vpages_valid; struct sparsebit *vpages_mapped; bool has_irqchip; - bool pgd_created; vm_paddr_t ucall_mmio_addr; - vm_paddr_t pgd; vm_vaddr_t handlers; uint32_t dirty_ring_size; uint64_t gpa_tag_mask; + /* + * "mmu" is the guest's stage-1, with a short name because the vast + * majority of tests only care about the stage-1 MMU. + */ + struct kvm_mmu mmu; + struct kvm_mmu stage2_mmu; + struct kvm_vm_arch arch; struct kvm_binary_stats stats; diff --git a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h index e43a57d99b56..d5095900e442 100644 --- a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h @@ -2,6 +2,7 @@ #ifndef SELFTEST_KVM_UTIL_ARCH_H #define SELFTEST_KVM_UTIL_ARCH_H +struct kvm_mmu_arch {}; struct kvm_vm_arch {}; #endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h index e43a57d99b56..d5095900e442 100644 --- a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h @@ -2,6 +2,7 @@ #ifndef SELFTEST_KVM_UTIL_ARCH_H #define SELFTEST_KVM_UTIL_ARCH_H +struct kvm_mmu_arch {}; struct kvm_vm_arch {}; #endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h index e43a57d99b56..d5095900e442 100644 --- a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h @@ -2,6 +2,7 @@ #ifndef SELFTEST_KVM_UTIL_ARCH_H #define SELFTEST_KVM_UTIL_ARCH_H +struct kvm_mmu_arch {}; struct kvm_vm_arch {}; #endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h index 80fe9f69b38d..e9b9aebaac97 100644 --- a/tools/testing/selftests/kvm/include/x86/apic.h +++ b/tools/testing/selftests/kvm/include/x86/apic.h @@ -28,6 +28,8 @@ #define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF) #define APIC_TASKPRI 0x80 #define APIC_PROCPRI 0xA0 +#define GET_APIC_PRI(x) (((x) & GENMASK(7, 4)) >> 4) +#define SET_APIC_PRI(x, y) (((x) & ~GENMASK(7, 4)) | (y << 4)) #define APIC_EOI 0xB0 #define APIC_SPIV 0xF0 #define APIC_SPIV_FOCUS_DISABLED (1 << 9) @@ -67,6 +69,7 @@ #define APIC_TMICT 0x380 #define APIC_TMCCT 0x390 #define APIC_TDCR 0x3E0 +#define APIC_SELF_IPI 0x3F0 void apic_disable(void); void xapic_enable(void); diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h index 972bb1c4ab4c..be35d26bb320 100644 --- a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h @@ -10,6 +10,28 @@ extern bool is_forced_emulation_enabled; +struct pte_masks { + uint64_t present; + uint64_t writable; + uint64_t user; + uint64_t readable; + uint64_t executable; + uint64_t accessed; + uint64_t dirty; + uint64_t huge; + uint64_t nx; + uint64_t c; + uint64_t s; + + uint64_t always_set; +}; + +struct kvm_mmu_arch { + struct pte_masks pte_masks; +}; + +struct kvm_mmu; + struct kvm_vm_arch { vm_vaddr_t gdt; vm_vaddr_t tss; diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 57d62a425109..4ebae4269e68 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -201,6 +201,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4) #define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10) #define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12) +#define X86_FEATURE_V_VMSAVE_VMLOAD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 15) #define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16) #define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30) #define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1) @@ -362,16 +363,6 @@ static inline unsigned int x86_model(unsigned int eax) return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); } -/* Page table bitfield declarations */ -#define PTE_PRESENT_MASK BIT_ULL(0) -#define PTE_WRITABLE_MASK BIT_ULL(1) -#define PTE_USER_MASK BIT_ULL(2) -#define PTE_ACCESSED_MASK BIT_ULL(5) -#define PTE_DIRTY_MASK BIT_ULL(6) -#define PTE_LARGE_MASK BIT_ULL(7) -#define PTE_GLOBAL_MASK BIT_ULL(8) -#define PTE_NX_MASK BIT_ULL(63) - #define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) #define PAGE_SHIFT 12 @@ -436,8 +427,10 @@ struct kvm_x86_state { static inline uint64_t get_desc64_base(const struct desc64 *desc) { - return ((uint64_t)desc->base3 << 32) | - (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); + return (uint64_t)desc->base3 << 32 | + (uint64_t)desc->base2 << 24 | + (uint64_t)desc->base1 << 16 | + (uint64_t)desc->base0; } static inline uint64_t rdtsc(void) @@ -1367,9 +1360,7 @@ static inline bool kvm_is_ignore_msrs(void) return get_kvm_param_bool("ignore_msrs"); } -uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, - int *level); -uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); +uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr); uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3); @@ -1451,10 +1442,52 @@ enum pg_level { #define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M) #define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G) -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level); +#define PTE_PRESENT_MASK(mmu) ((mmu)->arch.pte_masks.present) +#define PTE_WRITABLE_MASK(mmu) ((mmu)->arch.pte_masks.writable) +#define PTE_USER_MASK(mmu) ((mmu)->arch.pte_masks.user) +#define PTE_READABLE_MASK(mmu) ((mmu)->arch.pte_masks.readable) +#define PTE_EXECUTABLE_MASK(mmu) ((mmu)->arch.pte_masks.executable) +#define PTE_ACCESSED_MASK(mmu) ((mmu)->arch.pte_masks.accessed) +#define PTE_DIRTY_MASK(mmu) ((mmu)->arch.pte_masks.dirty) +#define PTE_HUGE_MASK(mmu) ((mmu)->arch.pte_masks.huge) +#define PTE_NX_MASK(mmu) ((mmu)->arch.pte_masks.nx) +#define PTE_C_BIT_MASK(mmu) ((mmu)->arch.pte_masks.c) +#define PTE_S_BIT_MASK(mmu) ((mmu)->arch.pte_masks.s) +#define PTE_ALWAYS_SET_MASK(mmu) ((mmu)->arch.pte_masks.always_set) + +/* + * For PTEs without a PRESENT bit (i.e. EPT entries), treat the PTE as present + * if it's executable or readable, as EPT supports execute-only PTEs, but not + * write-only PTEs. + */ +#define is_present_pte(mmu, pte) \ + (PTE_PRESENT_MASK(mmu) ? \ + !!(*(pte) & PTE_PRESENT_MASK(mmu)) : \ + !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu)))) +#define is_executable_pte(mmu, pte) \ + ((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu)) +#define is_writable_pte(mmu, pte) (!!(*(pte) & PTE_WRITABLE_MASK(mmu))) +#define is_user_pte(mmu, pte) (!!(*(pte) & PTE_USER_MASK(mmu))) +#define is_accessed_pte(mmu, pte) (!!(*(pte) & PTE_ACCESSED_MASK(mmu))) +#define is_dirty_pte(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu))) +#define is_huge_pte(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu))) +#define is_nx_pte(mmu, pte) (!is_executable_pte(mmu, pte)) + +void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, + struct pte_masks *pte_masks); + +void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr, + uint64_t paddr, int level); void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t nr_bytes, int level); +void vm_enable_tdp(struct kvm_vm *vm); +bool kvm_cpu_has_tdp(void); +void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint64_t size); +void tdp_identity_map_default_memslots(struct kvm_vm *vm); +void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size); +uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/include/x86/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h index b74c6dcddcbd..5d7c42534bc4 100644 --- a/tools/testing/selftests/kvm/include/x86/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86/svm_util.h @@ -27,6 +27,9 @@ struct svm_test_data { void *msr; /* gva */ void *msr_hva; uint64_t msr_gpa; + + /* NPT */ + uint64_t ncr3_gpa; }; static inline void vmmcall(void) @@ -57,6 +60,12 @@ struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva); void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp); void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa); +static inline bool kvm_cpu_has_npt(void) +{ + return kvm_cpu_has(X86_FEATURE_NPT); +} +void vm_enable_npt(struct kvm_vm *vm); + int open_sev_dev_path_or_exit(void); #endif /* SELFTEST_KVM_SVM_UTILS_H */ diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index 96e2b4c630a9..92b918700d24 100644 --- a/tools/testing/selftests/kvm/include/x86/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -520,13 +520,11 @@ struct vmx_pages { uint64_t vmwrite_gpa; void *vmwrite; - void *eptp_hva; - uint64_t eptp_gpa; - void *eptp; - void *apic_access_hva; uint64_t apic_access_gpa; void *apic_access; + + uint64_t eptp_gpa; }; union vmx_basic { @@ -559,16 +557,8 @@ bool load_vmcs(struct vmx_pages *vmx); bool ept_1g_pages_supported(void); -void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr); -void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size); -void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t memslot); -void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t addr, uint64_t size); bool kvm_cpu_has_ept(void); -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm); +void vm_enable_ept(struct kvm_vm *vm); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 1605dc740d1e..43ea40edc533 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -23,7 +23,7 @@ static vm_vaddr_t exception_handlers; static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva) { - unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; + unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; uint64_t mask = (1UL << (vm->va_bits - shift)) - 1; return (gva >> shift) & mask; @@ -34,7 +34,7 @@ static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva) unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift; uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; - TEST_ASSERT(vm->pgtable_levels == 4, + TEST_ASSERT(vm->mmu.pgtable_levels == 4, "Mode %d does not have 4 page table levels", vm->mode); return (gva >> shift) & mask; @@ -45,7 +45,7 @@ static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva) unsigned int shift = (vm->page_shift - 3) + vm->page_shift; uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; - TEST_ASSERT(vm->pgtable_levels >= 3, + TEST_ASSERT(vm->mmu.pgtable_levels >= 3, "Mode %d does not have >= 3 page table levels", vm->mode); return (gva >> shift) & mask; @@ -99,7 +99,7 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) static uint64_t ptrs_per_pgd(struct kvm_vm *vm) { - unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; + unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; return 1 << (vm->va_bits - shift); } @@ -112,13 +112,13 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) { size_t nr_pages = vm_page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; - if (vm->pgd_created) + if (vm->mmu.pgd_created) return; - vm->pgd = vm_phy_pages_alloc(vm, nr_pages, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, - vm->memslots[MEM_REGION_PT]); - vm->pgd_created = true; + vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->mmu.pgd_created = true; } static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, @@ -142,12 +142,12 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", paddr, vm->max_gfn, vm->page_size); - ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, vaddr) * 8; if (!*ptep) *ptep = addr_pte(vm, vm_alloc_page_table(vm), PGD_TYPE_TABLE | PTE_VALID); - switch (vm->pgtable_levels) { + switch (vm->mmu.pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; if (!*ptep) @@ -185,16 +185,16 @@ uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level { uint64_t *ptep; - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) goto unmapped_gva; - ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; if (level == 0) return ptep; - switch (vm->pgtable_levels) { + switch (vm->mmu.pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; if (!ptep) @@ -258,13 +258,13 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { - int level = 4 - (vm->pgtable_levels - 1); + int level = 4 - (vm->mmu.pgtable_levels - 1); uint64_t pgd, *ptep; - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) return; - for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) { + for (pgd = vm->mmu.pgd; pgd < vm->mmu.pgd + ptrs_per_pgd(vm) * 8; pgd += 8) { ptep = addr_gpa2hva(vm, pgd); if (!*ptep) continue; @@ -345,7 +345,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift); + ttbr0_el1 = vm->mmu.pgd & GENMASK(47, vm->page_shift); /* Configure output size */ switch (vm->mode) { @@ -353,7 +353,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: tcr_el1 |= TCR_IPS_52_BITS; - ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; + ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->mmu.pgd) << 2; break; case VM_MODE_P48V48_4K: case VM_MODE_P48V48_16K: diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index fab6b62d7810..a37d17984ac5 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -281,34 +281,34 @@ struct kvm_vm *____vm_create(struct vm_shape shape) /* Setup mode specific traits. */ switch (vm->mode) { case VM_MODE_P52V48_4K: - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; break; case VM_MODE_P52V48_64K: - vm->pgtable_levels = 3; + vm->mmu.pgtable_levels = 3; break; case VM_MODE_P48V48_4K: - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; break; case VM_MODE_P48V48_64K: - vm->pgtable_levels = 3; + vm->mmu.pgtable_levels = 3; break; case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; break; case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - vm->pgtable_levels = 3; + vm->mmu.pgtable_levels = 3; break; case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; break; case VM_MODE_P47V47_16K: case VM_MODE_P36V47_16K: - vm->pgtable_levels = 3; + vm->mmu.pgtable_levels = 3; break; case VM_MODE_PXXVYY_4K: #ifdef __x86_64__ @@ -321,22 +321,22 @@ struct kvm_vm *____vm_create(struct vm_shape shape) vm->va_bits); if (vm->va_bits == 57) { - vm->pgtable_levels = 5; + vm->mmu.pgtable_levels = 5; } else { TEST_ASSERT(vm->va_bits == 48, "Unexpected guest virtual address width: %d", vm->va_bits); - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; } #else TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms"); #endif break; case VM_MODE_P47V64_4K: - vm->pgtable_levels = 5; + vm->mmu.pgtable_levels = 5; break; case VM_MODE_P44V64_4K: - vm->pgtable_levels = 5; + vm->mmu.pgtable_levels = 5; break; default: TEST_FAIL("Unknown guest mode: 0x%x", vm->mode); @@ -1956,8 +1956,8 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); sparsebit_dump(stream, vm->vpages_mapped, indent + 2); fprintf(stream, "%*spgd_created: %u\n", indent, "", - vm->pgd_created); - if (vm->pgd_created) { + vm->mmu.pgd_created); + if (vm->mmu.pgd_created) { fprintf(stream, "%*sVirtual Translation Tables:\n", indent + 2, ""); virt_dump(stream, vm, indent + 4); diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index 07c103369ddb..17aa55a2047a 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -50,11 +50,11 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) int i; vm_paddr_t child, table; - if (vm->pgd_created) + if (vm->mmu.pgd_created) return; child = table = 0; - for (i = 0; i < vm->pgtable_levels; i++) { + for (i = 0; i < vm->mmu.pgtable_levels; i++) { invalid_pgtable[i] = child; table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN, vm->memslots[MEM_REGION_PT]); @@ -62,8 +62,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) virt_set_pgtable(vm, table, child); child = table; } - vm->pgd = table; - vm->pgd_created = true; + vm->mmu.pgd = table; + vm->mmu.pgd_created = true; } static int virt_pte_none(uint64_t *ptep, int level) @@ -77,11 +77,11 @@ static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc) uint64_t *ptep; vm_paddr_t child; - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) goto unmapped_gva; - child = vm->pgd; - level = vm->pgtable_levels - 1; + child = vm->mmu.pgd; + level = vm->mmu.pgtable_levels - 1; while (level > 0) { ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; if (virt_pte_none(ptep, level)) { @@ -161,11 +161,11 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { int level; - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) return; - level = vm->pgtable_levels - 1; - pte_dump(stream, vm, indent, vm->pgd, level); + level = vm->mmu.pgtable_levels - 1; + pte_dump(stream, vm, indent, vm->mmu.pgd, level); } void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) @@ -297,7 +297,7 @@ static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) width = vm->page_shift - 3; - switch (vm->pgtable_levels) { + switch (vm->mmu.pgtable_levels) { case 4: /* pud page shift and width */ val = (vm->page_shift + width * 2) << 20 | (width << 25); @@ -309,15 +309,15 @@ static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) val |= vm->page_shift | width << 5; break; default: - TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->pgtable_levels); + TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->mmu.pgtable_levels); } loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val); /* PGD page shift and width */ - val = (vm->page_shift + width * (vm->pgtable_levels - 1)) | width << 6; + val = (vm->page_shift + width * (vm->mmu.pgtable_levels - 1)) | width << 6; loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val); - loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->pgd); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->mmu.pgd); /* * Refill exception runs on real mode diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 401245fe31db..7663bbabcf1a 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -55,7 +55,7 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) { TEST_ASSERT(level > -1, "Negative page table level (%d) not possible", level); - TEST_ASSERT(level < vm->pgtable_levels, + TEST_ASSERT(level < vm->mmu.pgtable_levels, "Invalid page table level (%d)", level); return (gva & pte_index_mask[level]) >> pte_index_shift[level]; @@ -65,19 +65,19 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) { size_t nr_pages = vm_page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; - if (vm->pgd_created) + if (vm->mmu.pgd_created) return; - vm->pgd = vm_phy_pages_alloc(vm, nr_pages, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, - vm->memslots[MEM_REGION_PT]); - vm->pgd_created = true; + vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->mmu.pgd_created = true; } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { uint64_t *ptep, next_ppn; - int level = vm->pgtable_levels - 1; + int level = vm->mmu.pgtable_levels - 1; TEST_ASSERT((vaddr % vm->page_size) == 0, "Virtual address not on page boundary,\n" @@ -93,7 +93,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", paddr, vm->max_gfn, vm->page_size); - ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8; + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, vaddr, level) * 8; if (!*ptep) { next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | @@ -121,12 +121,12 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { uint64_t *ptep; - int level = vm->pgtable_levels - 1; + int level = vm->mmu.pgtable_levels - 1; - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) goto unmapped_gva; - ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8; + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8; if (!ptep) goto unmapped_gva; level--; @@ -171,13 +171,14 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { - int level = vm->pgtable_levels - 1; + struct kvm_mmu *mmu = &vm->mmu; + int level = mmu->pgtable_levels - 1; uint64_t pgd, *ptep; - if (!vm->pgd_created) + if (!mmu->pgd_created) return; - for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { + for (pgd = mmu->pgd; pgd < mmu->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { ptep = addr_gpa2hva(vm, pgd); if (!*ptep) continue; @@ -206,7 +207,7 @@ void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu) TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; + satp = (vm->mmu.pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; satp |= SATP_MODE_48; vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp); diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 8ceeb17c819a..6a9a660413a7 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -17,7 +17,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); - if (vm->pgd_created) + if (vm->mmu.pgd_created) return; paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, @@ -25,8 +25,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) vm->memslots[MEM_REGION_PT]); memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); - vm->pgd = paddr; - vm->pgd_created = true; + vm->mmu.pgd = paddr; + vm->mmu.pgd_created = true; } /* @@ -70,7 +70,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) gva, vm->max_gfn, vm->page_size); /* Walk through region and segment tables */ - entry = addr_gpa2hva(vm, vm->pgd); + entry = addr_gpa2hva(vm, vm->mmu.pgd); for (ri = 1; ri <= 4; ri++) { idx = (gva >> (64 - 11 * ri)) & 0x7ffu; if (entry[idx] & REGION_ENTRY_INVALID) @@ -94,7 +94,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); - entry = addr_gpa2hva(vm, vm->pgd); + entry = addr_gpa2hva(vm, vm->mmu.pgd); for (ri = 1; ri <= 4; ri++) { idx = (gva >> (64 - 11 * ri)) & 0x7ffu; TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID), @@ -149,10 +149,10 @@ static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) return; - virt_dump_region(stream, vm, indent, vm->pgd); + virt_dump_region(stream, vm, indent, vm->mmu.pgd); } void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) @@ -184,7 +184,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu_sregs_get(vcpu, &sregs); sregs.crs[0] |= 0x00040000; /* Enable floating point regs */ - sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */ + sregs.crs[1] = vm->mmu.pgd | 0xf; /* Primary region table */ vcpu_sregs_set(vcpu, &sregs); vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */ diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c index 0b1f288ad556..f53414ba7103 100644 --- a/tools/testing/selftests/kvm/lib/x86/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -13,6 +13,7 @@ #include "kvm_util.h" #include "memstress.h" #include "processor.h" +#include "svm_util.h" #include "vmx.h" void memstress_l2_guest_code(uint64_t vcpu_id) @@ -29,9 +30,10 @@ __asm__( " ud2;" ); -static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) -{ #define L2_GUEST_STACK_SIZE 64 + +static void l1_vmx_code(struct vmx_pages *vmx, uint64_t vcpu_id) +{ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; unsigned long *rsp; @@ -45,10 +47,34 @@ static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) prepare_vmcs(vmx, memstress_l2_guest_entry, rsp); GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL); GUEST_DONE(); } +static void l1_svm_code(struct svm_test_data *svm, uint64_t vcpu_id) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + unsigned long *rsp; + + + rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1]; + *rsp = vcpu_id; + generic_svm_setup(svm, memstress_l2_guest_entry, rsp); + + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); + GUEST_DONE(); +} + + +static void memstress_l1_guest_code(void *data, uint64_t vcpu_id) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data, vcpu_id); + else + l1_svm_code(data, vcpu_id); +} + uint64_t memstress_nested_pages(int nr_vcpus) { /* @@ -59,46 +85,37 @@ uint64_t memstress_nested_pages(int nr_vcpus) return 513 + 10 * nr_vcpus; } -void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) +static void memstress_setup_ept_mappings(struct kvm_vm *vm) { uint64_t start, end; - prepare_eptp(vmx, vm); - /* * Identity map the first 4G and the test region with 1G pages so that * KVM can shadow the EPT12 with the maximum huge page size supported * by the backing source. */ - nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); + tdp_identity_map_1g(vm, 0, 0x100000000ULL); start = align_down(memstress_args.gpa, PG_SIZE_1G); end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G); - nested_identity_map_1g(vmx, vm, start, end - start); + tdp_identity_map_1g(vm, start, end - start); } void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) { - struct vmx_pages *vmx, *vmx0 = NULL; struct kvm_regs regs; - vm_vaddr_t vmx_gva; + vm_vaddr_t nested_gva; int vcpu_id; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - TEST_REQUIRE(kvm_cpu_has_ept()); + TEST_REQUIRE(kvm_cpu_has_tdp()); + vm_enable_tdp(vm); + memstress_setup_ept_mappings(vm); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - vmx = vcpu_alloc_vmx(vm, &vmx_gva); - - if (vcpu_id == 0) { - memstress_setup_ept(vmx, vm); - vmx0 = vmx; - } else { - /* Share the same EPT table across all vCPUs. */ - vmx->eptp = vmx0->eptp; - vmx->eptp_hva = vmx0->eptp_hva; - vmx->eptp_gpa = vmx0->eptp_gpa; - } + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &nested_gva); + else + vcpu_alloc_svm(vm, &nested_gva); /* * Override the vCPU to run memstress_l1_guest_code() which will @@ -107,6 +124,6 @@ void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc vcpu_regs_get(vcpus[vcpu_id], ®s); regs.rip = (unsigned long) memstress_l1_guest_code; vcpu_regs_set(vcpus[vcpu_id], ®s); - vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id); + vcpu_args_set(vcpus[vcpu_id], 2, nested_gva, vcpu_id); } } diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 36104d27f3d9..fab18e9be66c 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -8,7 +8,9 @@ #include "kvm_util.h" #include "pmu.h" #include "processor.h" +#include "svm_util.h" #include "sev.h" +#include "vmx.h" #ifndef NUM_INTERRUPTS #define NUM_INTERRUPTS 256 @@ -156,26 +158,59 @@ bool kvm_is_tdp_enabled(void) return get_kvm_amd_param_bool("npt"); } +static void virt_mmu_init(struct kvm_vm *vm, struct kvm_mmu *mmu, + struct pte_masks *pte_masks) +{ + /* If needed, create the top-level page table. */ + if (!mmu->pgd_created) { + mmu->pgd = vm_alloc_page_table(vm); + mmu->pgd_created = true; + mmu->arch.pte_masks = *pte_masks; + } + + TEST_ASSERT(mmu->pgtable_levels == 4 || mmu->pgtable_levels == 5, + "Selftests MMU only supports 4-level and 5-level paging, not %u-level paging", + mmu->pgtable_levels); +} + void virt_arch_pgd_alloc(struct kvm_vm *vm) { TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, "Unknown or unsupported guest mode: 0x%x", vm->mode); - /* If needed, create the top-level page table. */ - if (!vm->pgd_created) { - vm->pgd = vm_alloc_page_table(vm); - vm->pgd_created = true; - } + struct pte_masks pte_masks = (struct pte_masks){ + .present = BIT_ULL(0), + .writable = BIT_ULL(1), + .user = BIT_ULL(2), + .accessed = BIT_ULL(5), + .dirty = BIT_ULL(6), + .huge = BIT_ULL(7), + .nx = BIT_ULL(63), + .executable = 0, + .c = vm->arch.c_bit, + .s = vm->arch.s_bit, + }; + + virt_mmu_init(vm, &vm->mmu, &pte_masks); +} + +void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, + struct pte_masks *pte_masks) +{ + TEST_ASSERT(!vm->stage2_mmu.pgtable_levels, "TDP MMU already initialized"); + + vm->stage2_mmu.pgtable_levels = pgtable_levels; + virt_mmu_init(vm, &vm->stage2_mmu, pte_masks); } -static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, - uint64_t vaddr, int level) +static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu, + uint64_t *parent_pte, uint64_t vaddr, int level) { uint64_t pt_gpa = PTE_GET_PA(*parent_pte); uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; - TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd, + TEST_ASSERT((*parent_pte == mmu->pgd) || is_present_pte(mmu, parent_pte), "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", level + 1, vaddr); @@ -183,20 +218,23 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, } static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, + struct kvm_mmu *mmu, uint64_t *parent_pte, uint64_t vaddr, uint64_t paddr, int current_level, int target_level) { - uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); + uint64_t *pte = virt_get_pte(vm, mmu, parent_pte, vaddr, current_level); paddr = vm_untag_gpa(vm, paddr); - if (!(*pte & PTE_PRESENT_MASK)) { - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; + if (!is_present_pte(mmu, pte)) { + *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | + PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) | + PTE_ALWAYS_SET_MASK(mmu); if (current_level == target_level) - *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); + *pte |= PTE_HUGE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK); else *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; } else { @@ -208,17 +246,18 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, TEST_ASSERT(current_level != target_level, "Cannot create hugepage at level: %u, vaddr: 0x%lx", current_level, vaddr); - TEST_ASSERT(!(*pte & PTE_LARGE_MASK), + TEST_ASSERT(!is_huge_pte(mmu, pte), "Cannot create page table at level: %u, vaddr: 0x%lx", current_level, vaddr); } return pte; } -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) +void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr, + uint64_t paddr, int level) { const uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t *pte = &vm->pgd; + uint64_t *pte = &mmu->pgd; int current_level; TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, @@ -239,38 +278,43 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr, "Unexpected bits in paddr: %lx", paddr); + TEST_ASSERT(!PTE_EXECUTABLE_MASK(mmu) || !PTE_NX_MASK(mmu), + "X and NX bit masks cannot be used simultaneously"); + /* * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. */ - for (current_level = vm->pgtable_levels; + for (current_level = mmu->pgtable_levels; current_level > PG_LEVEL_4K; current_level--) { - pte = virt_create_upper_pte(vm, pte, vaddr, paddr, + pte = virt_create_upper_pte(vm, mmu, pte, vaddr, paddr, current_level, level); - if (*pte & PTE_LARGE_MASK) + if (is_huge_pte(mmu, pte)) return; } /* Fill in page table entry. */ - pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K); - TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), + pte = virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K); + TEST_ASSERT(!is_present_pte(mmu, pte), "PTE already present for 4k page at vaddr: 0x%lx", vaddr); - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); + *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | + PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) | + PTE_ALWAYS_SET_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK); /* * Neither SEV nor TDX supports shared page tables, so only the final * leaf PTE needs manually set the C/S-bit. */ if (vm_is_gpa_protected(vm, paddr)) - *pte |= vm->arch.c_bit; + *pte |= PTE_C_BIT_MASK(mmu); else - *pte |= vm->arch.s_bit; + *pte |= PTE_S_BIT_MASK(mmu); } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { - __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); + __virt_pg_map(vm, &vm->mmu, vaddr, paddr, PG_LEVEL_4K); } void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, @@ -285,7 +329,7 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, nr_bytes, pg_size); for (i = 0; i < nr_pages; i++) { - __virt_pg_map(vm, vaddr, paddr, level); + __virt_pg_map(vm, &vm->mmu, vaddr, paddr, level); sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift, nr_bytes / PAGE_SIZE); @@ -294,9 +338,10 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } } -static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) +static bool vm_is_target_pte(struct kvm_mmu *mmu, uint64_t *pte, + int *level, int current_level) { - if (*pte & PTE_LARGE_MASK) { + if (is_huge_pte(mmu, pte)) { TEST_ASSERT(*level == PG_LEVEL_NONE || *level == current_level, "Unexpected hugepage at level %d", current_level); @@ -306,17 +351,19 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) return *level == current_level; } -uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, - int *level) +static uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, + struct kvm_mmu *mmu, + uint64_t vaddr, + int *level) { - int va_width = 12 + (vm->pgtable_levels) * 9; - uint64_t *pte = &vm->pgd; + int va_width = 12 + (mmu->pgtable_levels) * 9; + uint64_t *pte = &mmu->pgd; int current_level; TEST_ASSERT(!vm->arch.is_pt_protected, "Walking page tables of protected guests is impossible"); - TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= vm->pgtable_levels, + TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= mmu->pgtable_levels, "Invalid PG_LEVEL_* '%d'", *level); TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, @@ -332,32 +379,40 @@ uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, (((int64_t)vaddr << (64 - va_width) >> (64 - va_width))), "Canonical check failed. The virtual address is invalid."); - for (current_level = vm->pgtable_levels; + for (current_level = mmu->pgtable_levels; current_level > PG_LEVEL_4K; current_level--) { - pte = virt_get_pte(vm, pte, vaddr, current_level); - if (vm_is_target_pte(pte, level, current_level)) + pte = virt_get_pte(vm, mmu, pte, vaddr, current_level); + if (vm_is_target_pte(mmu, pte, level, current_level)) return pte; } - return virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K); + return virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K); +} + +uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa) +{ + int level = PG_LEVEL_4K; + + return __vm_get_page_table_entry(vm, &vm->stage2_mmu, l2_gpa, &level); } -uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) +uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr) { int level = PG_LEVEL_4K; - return __vm_get_page_table_entry(vm, vaddr, &level); + return __vm_get_page_table_entry(vm, &vm->mmu, vaddr, &level); } void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { + struct kvm_mmu *mmu = &vm->mmu; uint64_t *pml4e, *pml4e_start; uint64_t *pdpe, *pdpe_start; uint64_t *pde, *pde_start; uint64_t *pte, *pte_start; - if (!vm->pgd_created) + if (!mmu->pgd_created) return; fprintf(stream, "%*s " @@ -365,47 +420,47 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) fprintf(stream, "%*s index hvaddr gpaddr " "addr w exec dirty\n", indent, ""); - pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd); + pml4e_start = (uint64_t *) addr_gpa2hva(vm, mmu->pgd); for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { pml4e = &pml4e_start[n1]; - if (!(*pml4e & PTE_PRESENT_MASK)) + if (!is_present_pte(mmu, pml4e)) continue; fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u " " %u\n", indent, "", pml4e - pml4e_start, pml4e, addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e), - !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK)); + is_writable_pte(mmu, pml4e), is_nx_pte(mmu, pml4e)); pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK); for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { pdpe = &pdpe_start[n2]; - if (!(*pdpe & PTE_PRESENT_MASK)) + if (!is_present_pte(mmu, pdpe)) continue; fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx " "%u %u\n", indent, "", pdpe - pdpe_start, pdpe, addr_hva2gpa(vm, pdpe), - PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK), - !!(*pdpe & PTE_NX_MASK)); + PTE_GET_PFN(*pdpe), is_writable_pte(mmu, pdpe), + is_nx_pte(mmu, pdpe)); pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK); for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { pde = &pde_start[n3]; - if (!(*pde & PTE_PRESENT_MASK)) + if (!is_present_pte(mmu, pde)) continue; fprintf(stream, "%*spde 0x%-3zx %p " "0x%-12lx 0x%-10llx %u %u\n", indent, "", pde - pde_start, pde, addr_hva2gpa(vm, pde), - PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK), - !!(*pde & PTE_NX_MASK)); + PTE_GET_PFN(*pde), is_writable_pte(mmu, pde), + is_nx_pte(mmu, pde)); pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK); for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { pte = &pte_start[n4]; - if (!(*pte & PTE_PRESENT_MASK)) + if (!is_present_pte(mmu, pte)) continue; fprintf(stream, "%*spte 0x%-3zx %p " "0x%-12lx 0x%-10llx %u %u " @@ -414,9 +469,9 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) pte - pte_start, pte, addr_hva2gpa(vm, pte), PTE_GET_PFN(*pte), - !!(*pte & PTE_WRITABLE_MASK), - !!(*pte & PTE_NX_MASK), - !!(*pte & PTE_DIRTY_MASK), + is_writable_pte(mmu, pte), + is_nx_pte(mmu, pte), + is_dirty_pte(mmu, pte), ((uint64_t) n1 << 27) | ((uint64_t) n2 << 18) | ((uint64_t) n3 << 9) @@ -427,6 +482,72 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } +void vm_enable_tdp(struct kvm_vm *vm) +{ + if (kvm_cpu_has(X86_FEATURE_VMX)) + vm_enable_ept(vm); + else + vm_enable_npt(vm); +} + +bool kvm_cpu_has_tdp(void) +{ + return kvm_cpu_has_ept() || kvm_cpu_has_npt(); +} + +void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, + uint64_t size, int level) +{ + size_t page_size = PG_LEVEL_SIZE(level); + size_t npages = size / page_size; + + TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); + TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + + while (npages--) { + __virt_pg_map(vm, &vm->stage2_mmu, nested_paddr, paddr, level); + nested_paddr += page_size; + paddr += page_size; + } +} + +void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, + uint64_t size) +{ + __tdp_map(vm, nested_paddr, paddr, size, PG_LEVEL_4K); +} + +/* Prepare an identity extended page table that maps all the + * physical pages in VM. + */ +void tdp_identity_map_default_memslots(struct kvm_vm *vm) +{ + uint32_t s, memslot = 0; + sparsebit_idx_t i, last; + struct userspace_mem_region *region = memslot2region(vm, memslot); + + /* Only memslot 0 is mapped here, ensure it's the only one being used */ + for (s = 0; s < NR_MEM_REGIONS; s++) + TEST_ASSERT_EQ(vm->memslots[s], 0); + + i = (region->region.guest_phys_addr >> vm->page_shift) - 1; + last = i + (region->region.memory_size >> vm->page_shift); + for (;;) { + i = sparsebit_next_clear(region->unused_phy_pages, i); + if (i > last) + break; + + tdp_map(vm, (uint64_t)i << vm->page_shift, + (uint64_t)i << vm->page_shift, 1 << vm->page_shift); + } +} + +/* Identity map a region with 1GiB Pages. */ +void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size) +{ + __tdp_map(vm, addr, addr, size, PG_LEVEL_1G); +} + /* * Set Unusable Segment * @@ -497,9 +618,9 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp) vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { int level = PG_LEVEL_NONE; - uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level); + uint64_t *pte = __vm_get_page_table_entry(vm, &vm->mmu, gva, &level); - TEST_ASSERT(*pte & PTE_PRESENT_MASK, + TEST_ASSERT(is_present_pte(&vm->mmu, pte), "Leaf PTE not PRESENT for gva: 0x%08lx", gva); /* @@ -538,7 +659,7 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; if (kvm_cpu_has(X86_FEATURE_XSAVE)) sregs.cr4 |= X86_CR4_OSXSAVE; - if (vm->pgtable_levels == 5) + if (vm->mmu.pgtable_levels == 5) sregs.cr4 |= X86_CR4_LA57; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); @@ -549,7 +670,7 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) kvm_seg_set_kernel_data_64bit(&sregs.gs); kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr); - sregs.cr3 = vm->pgd; + sregs.cr3 = vm->mmu.pgd; vcpu_sregs_set(vcpu, &sregs); } diff --git a/tools/testing/selftests/kvm/lib/x86/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c index d239c2097391..2e5c480c9afd 100644 --- a/tools/testing/selftests/kvm/lib/x86/svm.c +++ b/tools/testing/selftests/kvm/lib/x86/svm.c @@ -46,6 +46,9 @@ vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva) svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr); memset(svm->msr_hva, 0, getpagesize()); + if (vm->stage2_mmu.pgd_created) + svm->ncr3_gpa = vm->stage2_mmu.pgd; + *p_svm_gva = svm_gva; return svm; } @@ -59,6 +62,25 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, seg->base = base; } +void vm_enable_npt(struct kvm_vm *vm) +{ + struct pte_masks pte_masks; + + TEST_ASSERT(kvm_cpu_has_npt(), "KVM doesn't supported nested NPT"); + + /* + * NPTs use the same PTE format, but deliberately drop the C-bit as the + * per-VM shared vs. private information is only meant for stage-1. + */ + pte_masks = vm->mmu.arch.pte_masks; + pte_masks.c = 0; + + /* NPT walks are treated as user accesses, so set the 'user' bit. */ + pte_masks.always_set = pte_masks.user; + + tdp_mmu_init(vm, vm->mmu.pgtable_levels, &pte_masks); +} + void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) { struct vmcb *vmcb = svm->vmcb; @@ -102,6 +124,11 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r vmcb->save.rip = (u64)guest_rip; vmcb->save.rsp = (u64)guest_rsp; guest_regs.rdi = (u64)svm; + + if (svm->ncr3_gpa) { + ctrl->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE; + ctrl->nested_cr3 = svm->ncr3_gpa; + } } /* diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index 29b082a58daa..c87b340362a9 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -10,38 +10,21 @@ #include "processor.h" #include "vmx.h" -#define PAGE_SHIFT_4K 12 - #define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 +#define EPTP_MT_SHIFT 0 /* EPTP memtype bits 2:0 */ +#define EPTP_PWL_SHIFT 3 /* EPTP page walk length bits 5:3 */ +#define EPTP_AD_ENABLED_SHIFT 6 /* EPTP AD enabled bit 6 */ + +#define EPTP_WB (X86_MEMTYPE_WB << EPTP_MT_SHIFT) +#define EPTP_PWL_4 (3ULL << EPTP_PWL_SHIFT) /* PWL is (levels - 1) */ +#define EPTP_AD_ENABLED (1ULL << EPTP_AD_ENABLED_SHIFT) + bool enable_evmcs; struct hv_enlightened_vmcs *current_evmcs; struct hv_vp_assist_page *current_vp_assist; -struct eptPageTableEntry { - uint64_t readable:1; - uint64_t writable:1; - uint64_t executable:1; - uint64_t memory_type:3; - uint64_t ignore_pat:1; - uint64_t page_size:1; - uint64_t accessed:1; - uint64_t dirty:1; - uint64_t ignored_11_10:2; - uint64_t address:40; - uint64_t ignored_62_52:11; - uint64_t suppress_ve:1; -}; - -struct eptPageTablePointer { - uint64_t memory_type:3; - uint64_t page_walk_length:3; - uint64_t ad_enabled:1; - uint64_t reserved_11_07:5; - uint64_t address:40; - uint64_t reserved_63_52:12; -}; int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) { uint16_t evmcs_ver; @@ -58,6 +41,32 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) return evmcs_ver; } +void vm_enable_ept(struct kvm_vm *vm) +{ + struct pte_masks pte_masks; + + TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); + + /* + * EPTs do not have 'present' or 'user' bits, instead bit 0 is the + * 'readable' bit. + */ + pte_masks = (struct pte_masks) { + .present = 0, + .user = 0, + .readable = BIT_ULL(0), + .writable = BIT_ULL(1), + .executable = BIT_ULL(2), + .huge = BIT_ULL(7), + .accessed = BIT_ULL(8), + .dirty = BIT_ULL(9), + .nx = 0, + }; + + /* TODO: Add support for 5-level EPT. */ + tdp_mmu_init(vm, 4, &pte_masks); +} + /* Allocate memory regions for nested VMX tests. * * Input Args: @@ -107,6 +116,9 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); memset(vmx->vmwrite_hva, 0, getpagesize()); + if (vm->stage2_mmu.pgd_created) + vmx->eptp_gpa = vm->stage2_mmu.pgd; + *p_vmx_gva = vmx_gva; return vmx; } @@ -196,16 +208,15 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); if (vmx->eptp_gpa) { - uint64_t ept_paddr; - struct eptPageTablePointer eptp = { - .memory_type = X86_MEMTYPE_WB, - .page_walk_length = 3, /* + 1 */ - .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), - .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, - }; - - memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); - vmwrite(EPT_POINTER, ept_paddr); + uint64_t eptp = vmx->eptp_gpa | EPTP_WB | EPTP_PWL_4; + + TEST_ASSERT((vmx->eptp_gpa & ~PHYSICAL_PAGE_MASK) == 0, + "Illegal bits set in vmx->eptp_gpa"); + + if (ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS)) + eptp |= EPTP_AD_ENABLED; + + vmwrite(EPT_POINTER, eptp); sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; } @@ -362,170 +373,13 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_guest_state(guest_rip, guest_rsp); } -static void nested_create_pte(struct kvm_vm *vm, - struct eptPageTableEntry *pte, - uint64_t nested_paddr, - uint64_t paddr, - int current_level, - int target_level) -{ - if (!pte->readable) { - pte->writable = true; - pte->readable = true; - pte->executable = true; - pte->page_size = (current_level == target_level); - if (pte->page_size) - pte->address = paddr >> vm->page_shift; - else - pte->address = vm_alloc_page_table(vm) >> vm->page_shift; - } else { - /* - * Entry already present. Assert that the caller doesn't want - * a hugepage at this level, and that there isn't a hugepage at - * this level. - */ - TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, nested_paddr: 0x%lx", - current_level, nested_paddr); - TEST_ASSERT(!pte->page_size, - "Cannot create page table at level: %u, nested_paddr: 0x%lx", - current_level, nested_paddr); - } -} - - -void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, int target_level) -{ - const uint64_t page_size = PG_LEVEL_SIZE(target_level); - struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; - uint16_t index; - - TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, - "Unknown or unsupported guest mode: 0x%x", vm->mode); - - TEST_ASSERT((nested_paddr >> 48) == 0, - "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT", - nested_paddr); - TEST_ASSERT((nested_paddr % page_size) == 0, - "Nested physical address not on page boundary,\n" - " nested_paddr: 0x%lx page_size: 0x%lx", - nested_paddr, page_size); - TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - TEST_ASSERT((paddr % page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx page_size: 0x%lx", - paddr, page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - - for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) { - index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; - pte = &pt[index]; - - nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level); - - if (pte->page_size) - break; - - pt = addr_gpa2hva(vm, pte->address * vm->page_size); - } - - /* - * For now mark these as accessed and dirty because the only - * testcase we have needs that. Can be reconsidered later. - */ - pte->accessed = true; - pte->dirty = true; - -} - -void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr) -{ - __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K); -} - -/* - * Map a range of EPT guest physical addresses to the VM's physical address - * - * Input Args: - * vm - Virtual Machine - * nested_paddr - Nested guest physical address to map - * paddr - VM Physical Address - * size - The size of the range to map - * level - The level at which to map the range - * - * Output Args: None - * - * Return: None - * - * Within the VM given by vm, creates a nested guest translation for the - * page range starting at nested_paddr to the page range starting at paddr. - */ -void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size, - int level) -{ - size_t page_size = PG_LEVEL_SIZE(level); - size_t npages = size / page_size; - - TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); - TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); - - while (npages--) { - __nested_pg_map(vmx, vm, nested_paddr, paddr, level); - nested_paddr += page_size; - paddr += page_size; - } -} - -void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size) -{ - __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K); -} - -/* Prepare an identity extended page table that maps all the - * physical pages in VM. - */ -void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t memslot) -{ - sparsebit_idx_t i, last; - struct userspace_mem_region *region = - memslot2region(vm, memslot); - - i = (region->region.guest_phys_addr >> vm->page_shift) - 1; - last = i + (region->region.memory_size >> vm->page_shift); - for (;;) { - i = sparsebit_next_clear(region->unused_phy_pages, i); - if (i > last) - break; - - nested_map(vmx, vm, - (uint64_t)i << vm->page_shift, - (uint64_t)i << vm->page_shift, - 1 << vm->page_shift); - } -} - -/* Identity map a region with 1GiB Pages. */ -void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t addr, uint64_t size) -{ - __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); -} - bool kvm_cpu_has_ept(void) { uint64_t ctrl; + if (!kvm_cpu_has(X86_FEATURE_VMX)) + return false; + ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) return false; @@ -534,15 +388,6 @@ bool kvm_cpu_has_ept(void) return ctrl & SECONDARY_EXEC_ENABLE_EPT; } -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm) -{ - TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); - - vmx->eptp = (void *)vm_vaddr_alloc_page(vm); - vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); - vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); -} - void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm) { vmx->apic_access = (void *)vm_vaddr_alloc_page(vm); diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index a3b7ce155981..c542cc4762b1 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c @@ -619,7 +619,7 @@ int main(int argc, char *argv[]) */ gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR); for (i = 0; i < NTEST_PAGES; i++) { - pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE); + pte = vm_get_pte(vm, data->test_pages + i * PAGE_SIZE); gpa = addr_hva2gpa(vm, pte); virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK); data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK); diff --git a/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c new file mode 100644 index 000000000000..619229bbd693 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c @@ -0,0 +1,293 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging test + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#include <stdio.h> +#include <stdlib.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "vmx.h" + +/* The memory slot index to track dirty pages */ +#define TEST_MEM_SLOT_INDEX 1 + +/* + * Allocate four pages total. Two pages are used to verify that the KVM marks + * the accessed page/GFN as marked dirty, but not the "other" page. Times two + * so that each "normal" page can be accessed from L2 via an aliased L2 GVA+GPA + * (when TDP is enabled), to verify KVM marks _L1's_ page/GFN as dirty (to + * detect failures, L2 => L1 GPAs can't be identity mapped in the TDP page + * tables, as marking L2's GPA dirty would get a false pass if L1 == L2). + */ +#define TEST_MEM_PAGES 4 + +#define TEST_MEM_BASE 0xc0000000 +#define TEST_MEM_ALIAS_BASE 0xc0002000 + +#define TEST_GUEST_ADDR(base, idx) ((base) + (idx) * PAGE_SIZE) + +#define TEST_GVA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx) +#define TEST_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx) + +#define TEST_ALIAS_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_ALIAS_BASE, idx) + +#define TEST_HVA(vm, idx) addr_gpa2hva(vm, TEST_GPA(idx)) + +#define L2_GUEST_STACK_SIZE 64 + +/* Use the page offset bits to communicate the access+fault type. */ +#define TEST_SYNC_READ_FAULT BIT(0) +#define TEST_SYNC_WRITE_FAULT BIT(1) +#define TEST_SYNC_NO_FAULT BIT(2) + +static void l2_guest_code(vm_vaddr_t base) +{ + vm_vaddr_t page0 = TEST_GUEST_ADDR(base, 0); + vm_vaddr_t page1 = TEST_GUEST_ADDR(base, 1); + + READ_ONCE(*(u64 *)page0); + GUEST_SYNC(page0 | TEST_SYNC_READ_FAULT); + WRITE_ONCE(*(u64 *)page0, 1); + GUEST_SYNC(page0 | TEST_SYNC_WRITE_FAULT); + READ_ONCE(*(u64 *)page0); + GUEST_SYNC(page0 | TEST_SYNC_NO_FAULT); + + WRITE_ONCE(*(u64 *)page1, 1); + GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT); + WRITE_ONCE(*(u64 *)page1, 1); + GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT); + READ_ONCE(*(u64 *)page1); + GUEST_SYNC(page1 | TEST_SYNC_NO_FAULT); + + /* Exit to L1 and never come back. */ + vmcall(); +} + +static void l2_guest_code_tdp_enabled(void) +{ + /* + * Use the aliased virtual addresses when running with TDP to verify + * that KVM correctly handles the case where a page is dirtied via a + * different GPA than would be used by L1. + */ + l2_guest_code(TEST_MEM_ALIAS_BASE); +} + +static void l2_guest_code_tdp_disabled(void) +{ + /* + * Use the "normal" virtual addresses when running without TDP enabled, + * in which case L2 will use the same page tables as L1, and thus needs + * to use the same virtual addresses that are mapped into L1. + */ + l2_guest_code(TEST_MEM_BASE); +} + +void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + void *l2_rip; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + + if (vmx->eptp_gpa) + l2_rip = l2_guest_code_tdp_enabled; + else + l2_rip = l2_guest_code_tdp_disabled; + + prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(TEST_SYNC_NO_FAULT); + GUEST_ASSERT(!vmlaunch()); + GUEST_SYNC(TEST_SYNC_NO_FAULT); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + void *l2_rip; + + if (svm->ncr3_gpa) + l2_rip = l2_guest_code_tdp_enabled; + else + l2_rip = l2_guest_code_tdp_disabled; + + generic_svm_setup(svm, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(TEST_SYNC_NO_FAULT); + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_SYNC(TEST_SYNC_NO_FAULT); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); + GUEST_DONE(); +} + +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + +static void test_handle_ucall_sync(struct kvm_vm *vm, u64 arg, + unsigned long *bmap) +{ + vm_vaddr_t gva = arg & ~(PAGE_SIZE - 1); + int page_nr, i; + + /* + * Extract the page number of underlying physical page, which is also + * the _L1_ page number. The dirty bitmap _must_ be updated based on + * the L1 GPA, not L2 GPA, i.e. whether or not L2 used an aliased GPA + * (i.e. if TDP enabled for L2) is irrelevant with respect to the dirty + * bitmap and which underlying physical page is accessed. + * + * Note, gva will be '0' if there was no access, i.e. if the purpose of + * the sync is to verify all pages are clean. + */ + if (!gva) + page_nr = 0; + else if (gva >= TEST_MEM_ALIAS_BASE) + page_nr = (gva - TEST_MEM_ALIAS_BASE) >> PAGE_SHIFT; + else + page_nr = (gva - TEST_MEM_BASE) >> PAGE_SHIFT; + TEST_ASSERT(page_nr == 0 || page_nr == 1, + "Test bug, unexpected frame number '%u' for arg = %lx", page_nr, arg); + TEST_ASSERT(gva || (arg & TEST_SYNC_NO_FAULT), + "Test bug, gva must be valid if a fault is expected"); + + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + + /* + * Check all pages to verify the correct physical page was modified (or + * not), and that all pages are clean/dirty as expected. + * + * If a fault of any kind is expected, the target page should be dirty + * as the Dirty bit is set in the gPTE. KVM should create a writable + * SPTE even on a read fault, *and* KVM must mark the GFN as dirty + * when doing so. + */ + for (i = 0; i < TEST_MEM_PAGES; i++) { + if (i == page_nr && (arg & TEST_SYNC_WRITE_FAULT)) + TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 1, + "Page %u incorrectly not written by guest", i); + else + TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 0xaaaaaaaaaaaaaaaaULL, + "Page %u incorrectly written by guest", i); + + if (i == page_nr && !(arg & TEST_SYNC_NO_FAULT)) + TEST_ASSERT(test_bit(i, bmap), + "Page %u incorrectly reported clean on %s fault", + i, arg & TEST_SYNC_READ_FAULT ? "read" : "write"); + else + TEST_ASSERT(!test_bit(i, bmap), + "Page %u incorrectly reported dirty", i); + } +} + +static void test_dirty_log(bool nested_tdp) +{ + vm_vaddr_t nested_gva = 0; + unsigned long *bmap; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool done = false; + + pr_info("Nested TDP: %s\n", nested_tdp ? "enabled" : "disabled"); + + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + if (nested_tdp) + vm_enable_tdp(vm); + + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &nested_gva); + else + vcpu_alloc_svm(vm, &nested_gva); + + vcpu_args_set(vcpu, 1, nested_gva); + + /* Add an extra memory slot for testing dirty logging */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + TEST_MEM_BASE, + TEST_MEM_SLOT_INDEX, + TEST_MEM_PAGES, + KVM_MEM_LOG_DIRTY_PAGES); + + /* + * Add an identity map for GVA range [0xc0000000, 0xc0004000). This + * affects both L1 and L2. However... + */ + virt_map(vm, TEST_MEM_BASE, TEST_MEM_BASE, TEST_MEM_PAGES); + + /* + * ... pages in the L2 GPA address range [0xc0002000, 0xc0004000) will + * map to [0xc0000000, 0xc0002000) when TDP is enabled (for L2). + * + * When TDP is disabled, the L2 guest code will still access the same L1 + * GPAs as the TDP enabled case. + * + * Set the Dirty bit in the PTEs used by L2 so that KVM will create + * writable SPTEs when handling read faults (if the Dirty bit isn't + * set, KVM must intercept the next write to emulate the Dirty bit + * update). + */ + if (nested_tdp) { + tdp_identity_map_default_memslots(vm); + tdp_map(vm, TEST_ALIAS_GPA(0), TEST_GPA(0), PAGE_SIZE); + tdp_map(vm, TEST_ALIAS_GPA(1), TEST_GPA(1), PAGE_SIZE); + + *tdp_get_pte(vm, TEST_ALIAS_GPA(0)) |= PTE_DIRTY_MASK(&vm->stage2_mmu); + *tdp_get_pte(vm, TEST_ALIAS_GPA(1)) |= PTE_DIRTY_MASK(&vm->stage2_mmu); + } else { + *vm_get_pte(vm, TEST_GVA(0)) |= PTE_DIRTY_MASK(&vm->mmu); + *vm_get_pte(vm, TEST_GVA(1)) |= PTE_DIRTY_MASK(&vm->mmu); + } + + bmap = bitmap_zalloc(TEST_MEM_PAGES); + + while (!done) { + memset(TEST_HVA(vm, 0), 0xaa, TEST_MEM_PAGES * PAGE_SIZE); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + test_handle_ucall_sync(vm, uc.args[1], bmap); + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM)); + + test_dirty_log(/*nested_tdp=*/false); + + if (kvm_cpu_has_tdp()) + test_dirty_log(/*nested_tdp=*/true); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c new file mode 100644 index 000000000000..6764a48f9d4d --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2026, Google LLC. + */ +#include "kvm_util.h" +#include "vmx.h" +#include "svm_util.h" +#include "kselftest.h" + +/* + * Allocate two VMCB pages for testing. Both pages have different GVAs (shared + * by both L1 and L2) and L1 GPAs. A single L2 GPA is used such that: + * - L2 GPA == L1 GPA for VMCB0. + * - L2 GPA is mapped to L1 GPA for VMCB1 using NPT in L1. + * + * This allows testing whether the GPA used by VMSAVE/VMLOAD in L2 is + * interpreted as a direct L1 GPA or translated using NPT as an L2 GPA, depends + * on which VMCB is accessed. + */ +#define TEST_MEM_SLOT_INDEX 1 +#define TEST_MEM_PAGES 2 +#define TEST_MEM_BASE 0xc0000000 + +#define TEST_GUEST_ADDR(idx) (TEST_MEM_BASE + (idx) * PAGE_SIZE) + +#define TEST_VMCB_L1_GPA(idx) TEST_GUEST_ADDR(idx) +#define TEST_VMCB_GVA(idx) TEST_GUEST_ADDR(idx) + +#define TEST_VMCB_L2_GPA TEST_VMCB_L1_GPA(0) + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code_vmsave(void) +{ + asm volatile("vmsave %0" : : "a"(TEST_VMCB_L2_GPA) : "memory"); +} + +static void l2_guest_code_vmload(void) +{ + asm volatile("vmload %0" : : "a"(TEST_VMCB_L2_GPA) : "memory"); +} + +static void l2_guest_code_vmcb(int vmcb_idx) +{ + wrmsr(MSR_KERNEL_GS_BASE, 0xaaaa); + l2_guest_code_vmsave(); + + /* Verify the VMCB used by VMSAVE and update KERNEL_GS_BASE to 0xbbbb */ + GUEST_SYNC(vmcb_idx); + + l2_guest_code_vmload(); + GUEST_ASSERT_EQ(rdmsr(MSR_KERNEL_GS_BASE), 0xbbbb); + + /* Reset MSR_KERNEL_GS_BASE */ + wrmsr(MSR_KERNEL_GS_BASE, 0); + l2_guest_code_vmsave(); + + vmmcall(); +} + +static void l2_guest_code_vmcb0(void) +{ + l2_guest_code_vmcb(0); +} + +static void l2_guest_code_vmcb1(void) +{ + l2_guest_code_vmcb(1); +} + +static void l1_guest_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* Each test case initializes the guest RIP below */ + generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Set VMSAVE/VMLOAD intercepts and make sure they work with.. */ + svm->vmcb->control.intercept |= (BIT_ULL(INTERCEPT_VMSAVE) | + BIT_ULL(INTERCEPT_VMLOAD)); + + /* ..VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK cleared.. */ + svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; + + svm->vmcb->save.rip = (u64)l2_guest_code_vmsave; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE); + + svm->vmcb->save.rip = (u64)l2_guest_code_vmload; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD); + + /* ..and VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK set */ + svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; + + svm->vmcb->save.rip = (u64)l2_guest_code_vmsave; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE); + + svm->vmcb->save.rip = (u64)l2_guest_code_vmload; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD); + + /* Now clear the intercepts to test VMSAVE/VMLOAD behavior */ + svm->vmcb->control.intercept &= ~(BIT_ULL(INTERCEPT_VMSAVE) | + BIT_ULL(INTERCEPT_VMLOAD)); + + /* + * Without VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK, the GPA will be + * interpreted as an L1 GPA, so VMCB0 should be used. + */ + svm->vmcb->save.rip = (u64)l2_guest_code_vmcb0; + svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); + + /* + * With VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK, the GPA will be interpeted as + * an L2 GPA, and translated through the NPT to VMCB1. + */ + svm->vmcb->save.rip = (u64)l2_guest_code_vmcb1; + svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t nested_gva = 0; + struct vmcb *test_vmcb[2]; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_NPT)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vm_enable_tdp(vm); + + vcpu_alloc_svm(vm, &nested_gva); + vcpu_args_set(vcpu, 1, nested_gva); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + TEST_MEM_BASE, TEST_MEM_SLOT_INDEX, + TEST_MEM_PAGES, 0); + + for (i = 0; i <= 1; i++) { + virt_map(vm, TEST_VMCB_GVA(i), TEST_VMCB_L1_GPA(i), 1); + test_vmcb[i] = (struct vmcb *)addr_gva2hva(vm, TEST_VMCB_GVA(i)); + } + + tdp_identity_map_default_memslots(vm); + + /* + * L2 GPA == L1_GPA(0), but map it to L1_GPA(1), to allow testing + * whether the L2 GPA is interpreted as an L1 GPA or translated through + * the NPT. + */ + TEST_ASSERT_EQ(TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(0)); + tdp_map(vm, TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(1), PAGE_SIZE); + + for (;;) { + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + i = uc.args[1]; + TEST_ASSERT(i == 0 || i == 1, "Unexpected VMCB idx: %d", i); + + /* + * Check that only the expected VMCB has KERNEL_GS_BASE + * set to 0xaaaa, and update it to 0xbbbb. + */ + TEST_ASSERT_EQ(test_vmcb[i]->save.kernel_gs_base, 0xaaaa); + TEST_ASSERT_EQ(test_vmcb[1-i]->save.kernel_gs_base, 0); + test_vmcb[i]->save.kernel_gs_base = 0xbbbb; + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c index fabeeaddfb3a..0e8aec568010 100644 --- a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c @@ -47,7 +47,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - uint64_t *pte; uint64_t *hva; uint64_t gpa; int rc; @@ -73,8 +72,7 @@ int main(int argc, char *argv[]) hva = addr_gpa2hva(vm, MEM_REGION_GPA); memset(hva, 0, PAGE_SIZE); - pte = vm_get_page_table_entry(vm, MEM_REGION_GVA); - *pte |= BIT_ULL(MAXPHYADDR); + *vm_get_pte(vm, MEM_REGION_GVA) |= BIT_ULL(MAXPHYADDR); vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c deleted file mode 100644 index 98cb6bdab3e6..000000000000 --- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c +++ /dev/null @@ -1,179 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * KVM dirty page logging test - * - * Copyright (C) 2018, Red Hat, Inc. - */ -#include <stdio.h> -#include <stdlib.h> -#include <linux/bitmap.h> -#include <linux/bitops.h> - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" - -/* The memory slot index to track dirty pages */ -#define TEST_MEM_SLOT_INDEX 1 -#define TEST_MEM_PAGES 3 - -/* L1 guest test virtual memory offset */ -#define GUEST_TEST_MEM 0xc0000000 - -/* L2 guest test virtual memory offset */ -#define NESTED_TEST_MEM1 0xc0001000 -#define NESTED_TEST_MEM2 0xc0002000 - -static void l2_guest_code(u64 *a, u64 *b) -{ - READ_ONCE(*a); - WRITE_ONCE(*a, 1); - GUEST_SYNC(true); - GUEST_SYNC(false); - - WRITE_ONCE(*b, 1); - GUEST_SYNC(true); - WRITE_ONCE(*b, 1); - GUEST_SYNC(true); - GUEST_SYNC(false); - - /* Exit to L1 and never come back. */ - vmcall(); -} - -static void l2_guest_code_ept_enabled(void) -{ - l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2); -} - -static void l2_guest_code_ept_disabled(void) -{ - /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */ - l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM); -} - -void l1_guest_code(struct vmx_pages *vmx) -{ -#define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - void *l2_rip; - - GUEST_ASSERT(vmx->vmcs_gpa); - GUEST_ASSERT(prepare_for_vmx_operation(vmx)); - GUEST_ASSERT(load_vmcs(vmx)); - - if (vmx->eptp_gpa) - l2_rip = l2_guest_code_ept_enabled; - else - l2_rip = l2_guest_code_ept_disabled; - - prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - GUEST_SYNC(false); - GUEST_ASSERT(!vmlaunch()); - GUEST_SYNC(false); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - GUEST_DONE(); -} - -static void test_vmx_dirty_log(bool enable_ept) -{ - vm_vaddr_t vmx_pages_gva = 0; - struct vmx_pages *vmx; - unsigned long *bmap; - uint64_t *host_test_mem; - - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - bool done = false; - - pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled"); - - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); - - /* Add an extra memory slot for testing dirty logging */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - GUEST_TEST_MEM, - TEST_MEM_SLOT_INDEX, - TEST_MEM_PAGES, - KVM_MEM_LOG_DIRTY_PAGES); - - /* - * Add an identity map for GVA range [0xc0000000, 0xc0002000). This - * affects both L1 and L2. However... - */ - virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES); - - /* - * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to - * 0xc0000000. - * - * Note that prepare_eptp should be called only L1's GPA map is done, - * meaning after the last call to virt_map. - * - * When EPT is disabled, the L2 guest code will still access the same L1 - * GPAs as the EPT enabled case. - */ - if (enable_ept) { - prepare_eptp(vmx, vm); - nested_map_memslot(vmx, vm, 0); - nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); - nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); - } - - bmap = bitmap_zalloc(TEST_MEM_PAGES); - host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); - - while (!done) { - memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE); - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - /* - * The nested guest wrote at offset 0x1000 in the memslot, but the - * dirty bitmap must be filled in according to L1 GPA, not L2. - */ - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); - if (uc.args[1]) { - TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean"); - TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest"); - } else { - TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest"); - } - - TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); - TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); - break; - case UCALL_DONE: - done = true; - break; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } -} - -int main(int argc, char *argv[]) -{ - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - - test_vmx_dirty_log(/*enable_ept=*/false); - - if (kvm_cpu_has_ept()) - test_vmx_dirty_log(/*enable_ept=*/true); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c index cf1d2d1f2a8f..915c42001dba 100644 --- a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c @@ -90,7 +90,7 @@ int main(int argc, char *argv[]) * L1 needs to read its own PML5 table to set up L2. Identity map * the PML5 table to facilitate this. */ - virt_map(vm, vm->pgd, vm->pgd, 1); + virt_map(vm, vm->mmu.pgd, vm->mmu.pgd, 1); vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vcpu, 1, vmx_pages_gva); diff --git a/tools/testing/selftests/kvm/x86/xapic_tpr_test.c b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c new file mode 100644 index 000000000000..3862134d9d40 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <fcntl.h> +#include <stdatomic.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include "apic.h" +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +static bool is_x2apic; + +#define IRQ_VECTOR 0x20 + +/* See also the comment at similar assertion in memslot_perf_test.c */ +static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless"); + +static atomic_uint tpr_guest_irq_sync_val; + +static void tpr_guest_irq_sync_flag_reset(void) +{ + atomic_store_explicit(&tpr_guest_irq_sync_val, 0, + memory_order_release); +} + +static unsigned int tpr_guest_irq_sync_val_get(void) +{ + return atomic_load_explicit(&tpr_guest_irq_sync_val, + memory_order_acquire); +} + +static void tpr_guest_irq_sync_val_inc(void) +{ + atomic_fetch_add_explicit(&tpr_guest_irq_sync_val, 1, + memory_order_acq_rel); +} + +static void tpr_guest_irq_handler_xapic(struct ex_regs *regs) +{ + tpr_guest_irq_sync_val_inc(); + + xapic_write_reg(APIC_EOI, 0); +} + +static void tpr_guest_irq_handler_x2apic(struct ex_regs *regs) +{ + tpr_guest_irq_sync_val_inc(); + + x2apic_write_reg(APIC_EOI, 0); +} + +static void tpr_guest_irq_queue(void) +{ + if (is_x2apic) { + x2apic_write_reg(APIC_SELF_IPI, IRQ_VECTOR); + } else { + uint32_t icr, icr2; + + icr = APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | + IRQ_VECTOR; + icr2 = 0; + + xapic_write_reg(APIC_ICR2, icr2); + xapic_write_reg(APIC_ICR, icr); + } +} + +static uint8_t tpr_guest_tpr_get(void) +{ + uint32_t taskpri; + + if (is_x2apic) + taskpri = x2apic_read_reg(APIC_TASKPRI); + else + taskpri = xapic_read_reg(APIC_TASKPRI); + + return GET_APIC_PRI(taskpri); +} + +static uint8_t tpr_guest_ppr_get(void) +{ + uint32_t procpri; + + if (is_x2apic) + procpri = x2apic_read_reg(APIC_PROCPRI); + else + procpri = xapic_read_reg(APIC_PROCPRI); + + return GET_APIC_PRI(procpri); +} + +static uint8_t tpr_guest_cr8_get(void) +{ + uint64_t cr8; + + asm volatile ("mov %%cr8, %[cr8]\n\t" : [cr8] "=r"(cr8)); + + return cr8 & GENMASK(3, 0); +} + +static void tpr_guest_check_tpr_ppr_cr8_equal(void) +{ + uint8_t tpr; + + tpr = tpr_guest_tpr_get(); + + GUEST_ASSERT_EQ(tpr_guest_ppr_get(), tpr); + GUEST_ASSERT_EQ(tpr_guest_cr8_get(), tpr); +} + +static void tpr_guest_code(void) +{ + cli(); + + if (is_x2apic) + x2apic_enable(); + else + xapic_enable(); + + GUEST_ASSERT_EQ(tpr_guest_tpr_get(), 0); + tpr_guest_check_tpr_ppr_cr8_equal(); + + tpr_guest_irq_queue(); + + /* TPR = 0 but IRQ masked by IF=0, should not fire */ + udelay(1000); + GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 0); + + sti(); + + /* IF=1 now, IRQ should fire */ + while (tpr_guest_irq_sync_val_get() == 0) + cpu_relax(); + GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1); + + GUEST_SYNC(true); + tpr_guest_check_tpr_ppr_cr8_equal(); + + tpr_guest_irq_queue(); + + /* IRQ masked by barely high enough TPR now, should not fire */ + udelay(1000); + GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1); + + GUEST_SYNC(false); + tpr_guest_check_tpr_ppr_cr8_equal(); + + /* TPR barely low enough now to unmask IRQ, should fire */ + while (tpr_guest_irq_sync_val_get() == 1) + cpu_relax(); + GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 2); + + GUEST_DONE(); +} + +static uint8_t lapic_tpr_get(struct kvm_lapic_state *xapic) +{ + return GET_APIC_PRI(*((u32 *)&xapic->regs[APIC_TASKPRI])); +} + +static void lapic_tpr_set(struct kvm_lapic_state *xapic, uint8_t val) +{ + u32 *taskpri = (u32 *)&xapic->regs[APIC_TASKPRI]; + + *taskpri = SET_APIC_PRI(*taskpri, val); +} + +static uint8_t sregs_tpr(struct kvm_sregs *sregs) +{ + return sregs->cr8 & GENMASK(3, 0); +} + +static void test_tpr_check_tpr_zero(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic_state xapic; + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); + + TEST_ASSERT_EQ(lapic_tpr_get(&xapic), 0); +} + +static void test_tpr_check_tpr_cr8_equal(struct kvm_vcpu *vcpu) +{ + struct kvm_sregs sregs; + struct kvm_lapic_state xapic; + + vcpu_sregs_get(vcpu, &sregs); + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); + + TEST_ASSERT_EQ(sregs_tpr(&sregs), lapic_tpr_get(&xapic)); +} + +static void test_tpr_set_tpr_for_irq(struct kvm_vcpu *vcpu, bool mask) +{ + struct kvm_lapic_state xapic; + uint8_t tpr; + + static_assert(IRQ_VECTOR >= 16, "invalid IRQ vector number"); + tpr = IRQ_VECTOR / 16; + if (!mask) + tpr--; + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); + lapic_tpr_set(&xapic, tpr); + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic); +} + +static void test_tpr(bool __is_x2apic) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + bool done = false; + + is_x2apic = __is_x2apic; + + vm = vm_create_with_one_vcpu(&vcpu, tpr_guest_code); + if (is_x2apic) { + vm_install_exception_handler(vm, IRQ_VECTOR, + tpr_guest_irq_handler_x2apic); + } else { + vm_install_exception_handler(vm, IRQ_VECTOR, + tpr_guest_irq_handler_xapic); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_X2APIC); + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + } + + sync_global_to_guest(vcpu->vm, is_x2apic); + + /* According to the SDM/APM the TPR value at reset is 0 */ + test_tpr_check_tpr_zero(vcpu); + test_tpr_check_tpr_cr8_equal(vcpu); + + tpr_guest_irq_sync_flag_reset(); + sync_global_to_guest(vcpu->vm, tpr_guest_irq_sync_val); + + while (!done) { + struct ucall uc; + + alarm(2); + vcpu_run(vcpu); + alarm(0); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + test_tpr_check_tpr_cr8_equal(vcpu); + done = true; + break; + case UCALL_SYNC: + test_tpr_check_tpr_cr8_equal(vcpu); + test_tpr_set_tpr_for_irq(vcpu, uc.args[1]); + break; + default: + TEST_FAIL("Unknown ucall result 0x%lx", uc.cmd); + break; + } + } + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + /* + * Use separate VMs for the xAPIC and x2APIC tests so that x2APIC can + * be fully hidden from the guest. KVM disallows changing CPUID after + * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC. + */ + test_tpr(false); + test_tpr(true); +} |
