1756e12e7SXiaoyao Li /* 2756e12e7SXiaoyao Li * QEMU TDX support 3756e12e7SXiaoyao Li * 4756e12e7SXiaoyao Li * Copyright (c) 2025 Intel Corporation 5756e12e7SXiaoyao Li * 6756e12e7SXiaoyao Li * Author: 7756e12e7SXiaoyao Li * Xiaoyao Li <xiaoyao.li@intel.com> 8756e12e7SXiaoyao Li * 9756e12e7SXiaoyao Li * SPDX-License-Identifier: GPL-2.0-or-later 10756e12e7SXiaoyao Li */ 11756e12e7SXiaoyao Li 12756e12e7SXiaoyao Li #include "qemu/osdep.h" 138eddedc3SXiaoyao Li #include "qemu/error-report.h" 148eddedc3SXiaoyao Li #include "qapi/error.h" 15756e12e7SXiaoyao Li #include "qom/object_interfaces.h" 16756e12e7SXiaoyao Li 17631a2ac5SXiaoyao Li #include "hw/i386/x86.h" 18b455880eSXiaoyao Li #include "kvm_i386.h" 19756e12e7SXiaoyao Li #include "tdx.h" 20756e12e7SXiaoyao Li 216016e297SXiaoyao Li #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) 22*bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) 23*bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63) 246016e297SXiaoyao Li 251619d0e4SXiaoyao Li static TdxGuest *tdx_guest; 261619d0e4SXiaoyao Li 278eddedc3SXiaoyao Li static struct kvm_tdx_capabilities *tdx_caps; 288eddedc3SXiaoyao Li 291619d0e4SXiaoyao Li /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */ 301619d0e4SXiaoyao Li bool is_tdx_vm(void) 311619d0e4SXiaoyao Li { 321619d0e4SXiaoyao Li return !!tdx_guest; 331619d0e4SXiaoyao Li } 341619d0e4SXiaoyao Li 358eddedc3SXiaoyao Li enum tdx_ioctl_level { 368eddedc3SXiaoyao Li TDX_VM_IOCTL, 378eddedc3SXiaoyao Li TDX_VCPU_IOCTL, 388eddedc3SXiaoyao Li }; 398eddedc3SXiaoyao Li 408eddedc3SXiaoyao Li static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state, 418eddedc3SXiaoyao Li int cmd_id, __u32 flags, void *data, 428eddedc3SXiaoyao Li Error **errp) 43631a2ac5SXiaoyao Li { 448eddedc3SXiaoyao Li struct kvm_tdx_cmd tdx_cmd = {}; 458eddedc3SXiaoyao Li int r; 468eddedc3SXiaoyao Li 478eddedc3SXiaoyao Li const char *tdx_ioctl_name[] = { 488eddedc3SXiaoyao Li [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES", 498eddedc3SXiaoyao Li [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM", 508eddedc3SXiaoyao Li [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU", 518eddedc3SXiaoyao Li [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION", 528eddedc3SXiaoyao Li [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM", 538eddedc3SXiaoyao Li [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID", 548eddedc3SXiaoyao Li }; 558eddedc3SXiaoyao Li 568eddedc3SXiaoyao Li tdx_cmd.id = cmd_id; 578eddedc3SXiaoyao Li tdx_cmd.flags = flags; 588eddedc3SXiaoyao Li tdx_cmd.data = (__u64)(unsigned long)data; 598eddedc3SXiaoyao Li 608eddedc3SXiaoyao Li switch (level) { 618eddedc3SXiaoyao Li case TDX_VM_IOCTL: 628eddedc3SXiaoyao Li r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); 638eddedc3SXiaoyao Li break; 648eddedc3SXiaoyao Li case TDX_VCPU_IOCTL: 658eddedc3SXiaoyao Li r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); 668eddedc3SXiaoyao Li break; 678eddedc3SXiaoyao Li default: 688eddedc3SXiaoyao Li error_setg(errp, "Invalid tdx_ioctl_level %d", level); 698eddedc3SXiaoyao Li return -EINVAL; 708eddedc3SXiaoyao Li } 718eddedc3SXiaoyao Li 728eddedc3SXiaoyao Li if (r < 0) { 738eddedc3SXiaoyao Li error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx", 748eddedc3SXiaoyao Li tdx_ioctl_name[cmd_id], tdx_cmd.hw_error); 758eddedc3SXiaoyao Li } 768eddedc3SXiaoyao Li return r; 778eddedc3SXiaoyao Li } 788eddedc3SXiaoyao Li 798eddedc3SXiaoyao Li static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data, 808eddedc3SXiaoyao Li Error **errp) 818eddedc3SXiaoyao Li { 828eddedc3SXiaoyao Li return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp); 838eddedc3SXiaoyao Li } 848eddedc3SXiaoyao Li 858eddedc3SXiaoyao Li static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags, 868eddedc3SXiaoyao Li void *data, Error **errp) 878eddedc3SXiaoyao Li { 888eddedc3SXiaoyao Li return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp); 898eddedc3SXiaoyao Li } 908eddedc3SXiaoyao Li 918eddedc3SXiaoyao Li static int get_tdx_capabilities(Error **errp) 928eddedc3SXiaoyao Li { 938eddedc3SXiaoyao Li struct kvm_tdx_capabilities *caps; 948eddedc3SXiaoyao Li /* 1st generation of TDX reports 6 cpuid configs */ 958eddedc3SXiaoyao Li int nr_cpuid_configs = 6; 968eddedc3SXiaoyao Li size_t size; 978eddedc3SXiaoyao Li int r; 988eddedc3SXiaoyao Li 998eddedc3SXiaoyao Li do { 1008eddedc3SXiaoyao Li Error *local_err = NULL; 1018eddedc3SXiaoyao Li size = sizeof(struct kvm_tdx_capabilities) + 1028eddedc3SXiaoyao Li nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2); 1038eddedc3SXiaoyao Li caps = g_malloc0(size); 1048eddedc3SXiaoyao Li caps->cpuid.nent = nr_cpuid_configs; 1058eddedc3SXiaoyao Li 1068eddedc3SXiaoyao Li r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err); 1078eddedc3SXiaoyao Li if (r == -E2BIG) { 1088eddedc3SXiaoyao Li g_free(caps); 1098eddedc3SXiaoyao Li nr_cpuid_configs *= 2; 1108eddedc3SXiaoyao Li if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) { 1118eddedc3SXiaoyao Li error_report("KVM TDX seems broken that number of CPUID entries" 1128eddedc3SXiaoyao Li " in kvm_tdx_capabilities exceeds limit: %d", 1138eddedc3SXiaoyao Li KVM_MAX_CPUID_ENTRIES); 1148eddedc3SXiaoyao Li error_propagate(errp, local_err); 1158eddedc3SXiaoyao Li return r; 1168eddedc3SXiaoyao Li } 1178eddedc3SXiaoyao Li error_free(local_err); 1188eddedc3SXiaoyao Li } else if (r < 0) { 1198eddedc3SXiaoyao Li g_free(caps); 1208eddedc3SXiaoyao Li error_propagate(errp, local_err); 1218eddedc3SXiaoyao Li return r; 1228eddedc3SXiaoyao Li } 1238eddedc3SXiaoyao Li } while (r == -E2BIG); 1248eddedc3SXiaoyao Li 1258eddedc3SXiaoyao Li tdx_caps = caps; 126631a2ac5SXiaoyao Li 127631a2ac5SXiaoyao Li return 0; 128631a2ac5SXiaoyao Li } 129631a2ac5SXiaoyao Li 1308eddedc3SXiaoyao Li static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) 1318eddedc3SXiaoyao Li { 1321619d0e4SXiaoyao Li TdxGuest *tdx = TDX_GUEST(cgs); 1338eddedc3SXiaoyao Li int r = 0; 1348eddedc3SXiaoyao Li 1358eddedc3SXiaoyao Li kvm_mark_guest_state_protected(); 1368eddedc3SXiaoyao Li 1378eddedc3SXiaoyao Li if (!tdx_caps) { 1388eddedc3SXiaoyao Li r = get_tdx_capabilities(errp); 1391619d0e4SXiaoyao Li if (r) { 1401619d0e4SXiaoyao Li return r; 1411619d0e4SXiaoyao Li } 1428eddedc3SXiaoyao Li } 1438eddedc3SXiaoyao Li 1441619d0e4SXiaoyao Li tdx_guest = tdx; 1451619d0e4SXiaoyao Li return 0; 1468eddedc3SXiaoyao Li } 1478eddedc3SXiaoyao Li 148b455880eSXiaoyao Li static int tdx_kvm_type(X86ConfidentialGuest *cg) 149b455880eSXiaoyao Li { 150b455880eSXiaoyao Li /* Do the object check */ 151b455880eSXiaoyao Li TDX_GUEST(cg); 152b455880eSXiaoyao Li 153b455880eSXiaoyao Li return KVM_X86_TDX_VM; 154b455880eSXiaoyao Li } 155b455880eSXiaoyao Li 156*bb3be394SXiaoyao Li static void setup_td_guest_attributes(X86CPU *x86cpu) 157*bb3be394SXiaoyao Li { 158*bb3be394SXiaoyao Li CPUX86State *env = &x86cpu->env; 159*bb3be394SXiaoyao Li 160*bb3be394SXiaoyao Li tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ? 161*bb3be394SXiaoyao Li TDX_TD_ATTRIBUTES_PKS : 0; 162*bb3be394SXiaoyao Li tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0; 163*bb3be394SXiaoyao Li } 164*bb3be394SXiaoyao Li 165f15898b0SXiaoyao Li static int setup_td_xfam(X86CPU *x86cpu, Error **errp) 166f15898b0SXiaoyao Li { 167f15898b0SXiaoyao Li CPUX86State *env = &x86cpu->env; 168f15898b0SXiaoyao Li uint64_t xfam; 169f15898b0SXiaoyao Li 170f15898b0SXiaoyao Li xfam = env->features[FEAT_XSAVE_XCR0_LO] | 171f15898b0SXiaoyao Li env->features[FEAT_XSAVE_XCR0_HI] | 172f15898b0SXiaoyao Li env->features[FEAT_XSAVE_XSS_LO] | 173f15898b0SXiaoyao Li env->features[FEAT_XSAVE_XSS_HI]; 174f15898b0SXiaoyao Li 175f15898b0SXiaoyao Li if (xfam & ~tdx_caps->supported_xfam) { 176f15898b0SXiaoyao Li error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))", 177f15898b0SXiaoyao Li xfam, tdx_caps->supported_xfam); 178f15898b0SXiaoyao Li return -1; 179f15898b0SXiaoyao Li } 180f15898b0SXiaoyao Li 181f15898b0SXiaoyao Li tdx_guest->xfam = xfam; 182f15898b0SXiaoyao Li return 0; 183f15898b0SXiaoyao Li } 184f15898b0SXiaoyao Li 185f15898b0SXiaoyao Li static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids) 186f15898b0SXiaoyao Li { 187f15898b0SXiaoyao Li int i, dest_cnt = 0; 188f15898b0SXiaoyao Li struct kvm_cpuid_entry2 *src, *dest, *conf; 189f15898b0SXiaoyao Li 190f15898b0SXiaoyao Li for (i = 0; i < cpuids->nent; i++) { 191f15898b0SXiaoyao Li src = cpuids->entries + i; 192f15898b0SXiaoyao Li conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index); 193f15898b0SXiaoyao Li if (!conf) { 194f15898b0SXiaoyao Li continue; 195f15898b0SXiaoyao Li } 196f15898b0SXiaoyao Li dest = cpuids->entries + dest_cnt; 197f15898b0SXiaoyao Li 198f15898b0SXiaoyao Li dest->function = src->function; 199f15898b0SXiaoyao Li dest->index = src->index; 200f15898b0SXiaoyao Li dest->flags = src->flags; 201f15898b0SXiaoyao Li dest->eax = src->eax & conf->eax; 202f15898b0SXiaoyao Li dest->ebx = src->ebx & conf->ebx; 203f15898b0SXiaoyao Li dest->ecx = src->ecx & conf->ecx; 204f15898b0SXiaoyao Li dest->edx = src->edx & conf->edx; 205f15898b0SXiaoyao Li 206f15898b0SXiaoyao Li dest_cnt++; 207f15898b0SXiaoyao Li } 208f15898b0SXiaoyao Li cpuids->nent = dest_cnt++; 209f15898b0SXiaoyao Li } 210f15898b0SXiaoyao Li 211f15898b0SXiaoyao Li int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) 212f15898b0SXiaoyao Li { 213f15898b0SXiaoyao Li X86CPU *x86cpu = X86_CPU(cpu); 214f15898b0SXiaoyao Li CPUX86State *env = &x86cpu->env; 215f15898b0SXiaoyao Li g_autofree struct kvm_tdx_init_vm *init_vm = NULL; 216f15898b0SXiaoyao Li Error *local_err = NULL; 217f15898b0SXiaoyao Li int retry = 10000; 218f15898b0SXiaoyao Li int r = 0; 219f15898b0SXiaoyao Li 220f15898b0SXiaoyao Li QEMU_LOCK_GUARD(&tdx_guest->lock); 221f15898b0SXiaoyao Li if (tdx_guest->initialized) { 222f15898b0SXiaoyao Li return r; 223f15898b0SXiaoyao Li } 224f15898b0SXiaoyao Li 225f15898b0SXiaoyao Li init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + 226f15898b0SXiaoyao Li sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); 227f15898b0SXiaoyao Li 228*bb3be394SXiaoyao Li setup_td_guest_attributes(x86cpu); 229*bb3be394SXiaoyao Li 230f15898b0SXiaoyao Li r = setup_td_xfam(x86cpu, errp); 231f15898b0SXiaoyao Li if (r) { 232f15898b0SXiaoyao Li return r; 233f15898b0SXiaoyao Li } 234f15898b0SXiaoyao Li 235f15898b0SXiaoyao Li init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0); 236f15898b0SXiaoyao Li tdx_filter_cpuid(&init_vm->cpuid); 237f15898b0SXiaoyao Li 238f15898b0SXiaoyao Li init_vm->attributes = tdx_guest->attributes; 239f15898b0SXiaoyao Li init_vm->xfam = tdx_guest->xfam; 240f15898b0SXiaoyao Li 241f15898b0SXiaoyao Li /* 242f15898b0SXiaoyao Li * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE) 243f15898b0SXiaoyao Li * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or 244f15898b0SXiaoyao Li * RDSEED) is busy. 245f15898b0SXiaoyao Li * 246f15898b0SXiaoyao Li * Retry for the case. 247f15898b0SXiaoyao Li */ 248f15898b0SXiaoyao Li do { 249f15898b0SXiaoyao Li error_free(local_err); 250f15898b0SXiaoyao Li local_err = NULL; 251f15898b0SXiaoyao Li r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err); 252f15898b0SXiaoyao Li } while (r == -EAGAIN && --retry); 253f15898b0SXiaoyao Li 254f15898b0SXiaoyao Li if (r < 0) { 255f15898b0SXiaoyao Li if (!retry) { 256f15898b0SXiaoyao Li error_append_hint(&local_err, "Hardware RNG (Random Number " 257f15898b0SXiaoyao Li "Generator) is busy occupied by someone (via RDRAND/RDSEED) " 258f15898b0SXiaoyao Li "maliciously, which leads to KVM_TDX_INIT_VM keeping failure " 259f15898b0SXiaoyao Li "due to lack of entropy.\n"); 260f15898b0SXiaoyao Li } 261f15898b0SXiaoyao Li error_propagate(errp, local_err); 262f15898b0SXiaoyao Li return r; 263f15898b0SXiaoyao Li } 264f15898b0SXiaoyao Li 265f15898b0SXiaoyao Li tdx_guest->initialized = true; 266f15898b0SXiaoyao Li 267f15898b0SXiaoyao Li return 0; 268f15898b0SXiaoyao Li } 269f15898b0SXiaoyao Li 2706016e297SXiaoyao Li static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) 2716016e297SXiaoyao Li { 2726016e297SXiaoyao Li TdxGuest *tdx = TDX_GUEST(obj); 2736016e297SXiaoyao Li 2746016e297SXiaoyao Li return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE); 2756016e297SXiaoyao Li } 2766016e297SXiaoyao Li 2776016e297SXiaoyao Li static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp) 2786016e297SXiaoyao Li { 2796016e297SXiaoyao Li TdxGuest *tdx = TDX_GUEST(obj); 2806016e297SXiaoyao Li 2816016e297SXiaoyao Li if (value) { 2826016e297SXiaoyao Li tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 2836016e297SXiaoyao Li } else { 2846016e297SXiaoyao Li tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 2856016e297SXiaoyao Li } 2866016e297SXiaoyao Li } 2876016e297SXiaoyao Li 288756e12e7SXiaoyao Li /* tdx guest */ 289756e12e7SXiaoyao Li OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, 290756e12e7SXiaoyao Li tdx_guest, 291756e12e7SXiaoyao Li TDX_GUEST, 292756e12e7SXiaoyao Li X86_CONFIDENTIAL_GUEST, 293756e12e7SXiaoyao Li { TYPE_USER_CREATABLE }, 294756e12e7SXiaoyao Li { NULL }) 295756e12e7SXiaoyao Li 296756e12e7SXiaoyao Li static void tdx_guest_init(Object *obj) 297756e12e7SXiaoyao Li { 298756e12e7SXiaoyao Li ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); 299756e12e7SXiaoyao Li TdxGuest *tdx = TDX_GUEST(obj); 300756e12e7SXiaoyao Li 301f15898b0SXiaoyao Li qemu_mutex_init(&tdx->lock); 302f15898b0SXiaoyao Li 303756e12e7SXiaoyao Li cgs->require_guest_memfd = true; 304714af522SIsaku Yamahata tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 305756e12e7SXiaoyao Li 306756e12e7SXiaoyao Li object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, 307756e12e7SXiaoyao Li OBJ_PROP_FLAG_READWRITE); 3086016e297SXiaoyao Li object_property_add_bool(obj, "sept-ve-disable", 3096016e297SXiaoyao Li tdx_guest_get_sept_ve_disable, 3106016e297SXiaoyao Li tdx_guest_set_sept_ve_disable); 311756e12e7SXiaoyao Li } 312756e12e7SXiaoyao Li 313756e12e7SXiaoyao Li static void tdx_guest_finalize(Object *obj) 314756e12e7SXiaoyao Li { 315756e12e7SXiaoyao Li } 316756e12e7SXiaoyao Li 317756e12e7SXiaoyao Li static void tdx_guest_class_init(ObjectClass *oc, const void *data) 318756e12e7SXiaoyao Li { 319631a2ac5SXiaoyao Li ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); 320b455880eSXiaoyao Li X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); 321b455880eSXiaoyao Li 322631a2ac5SXiaoyao Li klass->kvm_init = tdx_kvm_init; 323b455880eSXiaoyao Li x86_klass->kvm_type = tdx_kvm_type; 324756e12e7SXiaoyao Li } 325