xref: /qemu/target/i386/kvm/tdx.c (revision bb3be394cf80d68251e5b89e823dddc679b6e644)
1756e12e7SXiaoyao Li /*
2756e12e7SXiaoyao Li  * QEMU TDX support
3756e12e7SXiaoyao Li  *
4756e12e7SXiaoyao Li  * Copyright (c) 2025 Intel Corporation
5756e12e7SXiaoyao Li  *
6756e12e7SXiaoyao Li  * Author:
7756e12e7SXiaoyao Li  *      Xiaoyao Li <xiaoyao.li@intel.com>
8756e12e7SXiaoyao Li  *
9756e12e7SXiaoyao Li  * SPDX-License-Identifier: GPL-2.0-or-later
10756e12e7SXiaoyao Li  */
11756e12e7SXiaoyao Li 
12756e12e7SXiaoyao Li #include "qemu/osdep.h"
138eddedc3SXiaoyao Li #include "qemu/error-report.h"
148eddedc3SXiaoyao Li #include "qapi/error.h"
15756e12e7SXiaoyao Li #include "qom/object_interfaces.h"
16756e12e7SXiaoyao Li 
17631a2ac5SXiaoyao Li #include "hw/i386/x86.h"
18b455880eSXiaoyao Li #include "kvm_i386.h"
19756e12e7SXiaoyao Li #include "tdx.h"
20756e12e7SXiaoyao Li 
216016e297SXiaoyao Li #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE   BIT_ULL(28)
22*bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PKS               BIT_ULL(30)
23*bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PERFMON           BIT_ULL(63)
246016e297SXiaoyao Li 
251619d0e4SXiaoyao Li static TdxGuest *tdx_guest;
261619d0e4SXiaoyao Li 
278eddedc3SXiaoyao Li static struct kvm_tdx_capabilities *tdx_caps;
288eddedc3SXiaoyao Li 
291619d0e4SXiaoyao Li /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */
301619d0e4SXiaoyao Li bool is_tdx_vm(void)
311619d0e4SXiaoyao Li {
321619d0e4SXiaoyao Li     return !!tdx_guest;
331619d0e4SXiaoyao Li }
341619d0e4SXiaoyao Li 
358eddedc3SXiaoyao Li enum tdx_ioctl_level {
368eddedc3SXiaoyao Li     TDX_VM_IOCTL,
378eddedc3SXiaoyao Li     TDX_VCPU_IOCTL,
388eddedc3SXiaoyao Li };
398eddedc3SXiaoyao Li 
408eddedc3SXiaoyao Li static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state,
418eddedc3SXiaoyao Li                               int cmd_id, __u32 flags, void *data,
428eddedc3SXiaoyao Li                               Error **errp)
43631a2ac5SXiaoyao Li {
448eddedc3SXiaoyao Li     struct kvm_tdx_cmd tdx_cmd = {};
458eddedc3SXiaoyao Li     int r;
468eddedc3SXiaoyao Li 
478eddedc3SXiaoyao Li     const char *tdx_ioctl_name[] = {
488eddedc3SXiaoyao Li         [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES",
498eddedc3SXiaoyao Li         [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM",
508eddedc3SXiaoyao Li         [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU",
518eddedc3SXiaoyao Li         [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION",
528eddedc3SXiaoyao Li         [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM",
538eddedc3SXiaoyao Li         [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID",
548eddedc3SXiaoyao Li     };
558eddedc3SXiaoyao Li 
568eddedc3SXiaoyao Li     tdx_cmd.id = cmd_id;
578eddedc3SXiaoyao Li     tdx_cmd.flags = flags;
588eddedc3SXiaoyao Li     tdx_cmd.data = (__u64)(unsigned long)data;
598eddedc3SXiaoyao Li 
608eddedc3SXiaoyao Li     switch (level) {
618eddedc3SXiaoyao Li     case TDX_VM_IOCTL:
628eddedc3SXiaoyao Li         r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
638eddedc3SXiaoyao Li         break;
648eddedc3SXiaoyao Li     case TDX_VCPU_IOCTL:
658eddedc3SXiaoyao Li         r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
668eddedc3SXiaoyao Li         break;
678eddedc3SXiaoyao Li     default:
688eddedc3SXiaoyao Li         error_setg(errp, "Invalid tdx_ioctl_level %d", level);
698eddedc3SXiaoyao Li         return -EINVAL;
708eddedc3SXiaoyao Li     }
718eddedc3SXiaoyao Li 
728eddedc3SXiaoyao Li     if (r < 0) {
738eddedc3SXiaoyao Li         error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx",
748eddedc3SXiaoyao Li                          tdx_ioctl_name[cmd_id], tdx_cmd.hw_error);
758eddedc3SXiaoyao Li     }
768eddedc3SXiaoyao Li     return r;
778eddedc3SXiaoyao Li }
788eddedc3SXiaoyao Li 
798eddedc3SXiaoyao Li static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data,
808eddedc3SXiaoyao Li                                Error **errp)
818eddedc3SXiaoyao Li {
828eddedc3SXiaoyao Li     return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp);
838eddedc3SXiaoyao Li }
848eddedc3SXiaoyao Li 
858eddedc3SXiaoyao Li static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags,
868eddedc3SXiaoyao Li                                  void *data, Error **errp)
878eddedc3SXiaoyao Li {
888eddedc3SXiaoyao Li     return  tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp);
898eddedc3SXiaoyao Li }
908eddedc3SXiaoyao Li 
918eddedc3SXiaoyao Li static int get_tdx_capabilities(Error **errp)
928eddedc3SXiaoyao Li {
938eddedc3SXiaoyao Li     struct kvm_tdx_capabilities *caps;
948eddedc3SXiaoyao Li     /* 1st generation of TDX reports 6 cpuid configs */
958eddedc3SXiaoyao Li     int nr_cpuid_configs = 6;
968eddedc3SXiaoyao Li     size_t size;
978eddedc3SXiaoyao Li     int r;
988eddedc3SXiaoyao Li 
998eddedc3SXiaoyao Li     do {
1008eddedc3SXiaoyao Li         Error *local_err = NULL;
1018eddedc3SXiaoyao Li         size = sizeof(struct kvm_tdx_capabilities) +
1028eddedc3SXiaoyao Li                       nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2);
1038eddedc3SXiaoyao Li         caps = g_malloc0(size);
1048eddedc3SXiaoyao Li         caps->cpuid.nent = nr_cpuid_configs;
1058eddedc3SXiaoyao Li 
1068eddedc3SXiaoyao Li         r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err);
1078eddedc3SXiaoyao Li         if (r == -E2BIG) {
1088eddedc3SXiaoyao Li             g_free(caps);
1098eddedc3SXiaoyao Li             nr_cpuid_configs *= 2;
1108eddedc3SXiaoyao Li             if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) {
1118eddedc3SXiaoyao Li                 error_report("KVM TDX seems broken that number of CPUID entries"
1128eddedc3SXiaoyao Li                              " in kvm_tdx_capabilities exceeds limit: %d",
1138eddedc3SXiaoyao Li                              KVM_MAX_CPUID_ENTRIES);
1148eddedc3SXiaoyao Li                 error_propagate(errp, local_err);
1158eddedc3SXiaoyao Li                 return r;
1168eddedc3SXiaoyao Li             }
1178eddedc3SXiaoyao Li             error_free(local_err);
1188eddedc3SXiaoyao Li         } else if (r < 0) {
1198eddedc3SXiaoyao Li             g_free(caps);
1208eddedc3SXiaoyao Li             error_propagate(errp, local_err);
1218eddedc3SXiaoyao Li             return r;
1228eddedc3SXiaoyao Li         }
1238eddedc3SXiaoyao Li     } while (r == -E2BIG);
1248eddedc3SXiaoyao Li 
1258eddedc3SXiaoyao Li     tdx_caps = caps;
126631a2ac5SXiaoyao Li 
127631a2ac5SXiaoyao Li     return 0;
128631a2ac5SXiaoyao Li }
129631a2ac5SXiaoyao Li 
1308eddedc3SXiaoyao Li static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
1318eddedc3SXiaoyao Li {
1321619d0e4SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(cgs);
1338eddedc3SXiaoyao Li     int r = 0;
1348eddedc3SXiaoyao Li 
1358eddedc3SXiaoyao Li     kvm_mark_guest_state_protected();
1368eddedc3SXiaoyao Li 
1378eddedc3SXiaoyao Li     if (!tdx_caps) {
1388eddedc3SXiaoyao Li         r = get_tdx_capabilities(errp);
1391619d0e4SXiaoyao Li         if (r) {
1401619d0e4SXiaoyao Li             return r;
1411619d0e4SXiaoyao Li         }
1428eddedc3SXiaoyao Li     }
1438eddedc3SXiaoyao Li 
1441619d0e4SXiaoyao Li     tdx_guest = tdx;
1451619d0e4SXiaoyao Li     return 0;
1468eddedc3SXiaoyao Li }
1478eddedc3SXiaoyao Li 
148b455880eSXiaoyao Li static int tdx_kvm_type(X86ConfidentialGuest *cg)
149b455880eSXiaoyao Li {
150b455880eSXiaoyao Li     /* Do the object check */
151b455880eSXiaoyao Li     TDX_GUEST(cg);
152b455880eSXiaoyao Li 
153b455880eSXiaoyao Li     return KVM_X86_TDX_VM;
154b455880eSXiaoyao Li }
155b455880eSXiaoyao Li 
156*bb3be394SXiaoyao Li static void setup_td_guest_attributes(X86CPU *x86cpu)
157*bb3be394SXiaoyao Li {
158*bb3be394SXiaoyao Li     CPUX86State *env = &x86cpu->env;
159*bb3be394SXiaoyao Li 
160*bb3be394SXiaoyao Li     tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ?
161*bb3be394SXiaoyao Li                              TDX_TD_ATTRIBUTES_PKS : 0;
162*bb3be394SXiaoyao Li     tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0;
163*bb3be394SXiaoyao Li }
164*bb3be394SXiaoyao Li 
165f15898b0SXiaoyao Li static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
166f15898b0SXiaoyao Li {
167f15898b0SXiaoyao Li     CPUX86State *env = &x86cpu->env;
168f15898b0SXiaoyao Li     uint64_t xfam;
169f15898b0SXiaoyao Li 
170f15898b0SXiaoyao Li     xfam = env->features[FEAT_XSAVE_XCR0_LO] |
171f15898b0SXiaoyao Li            env->features[FEAT_XSAVE_XCR0_HI] |
172f15898b0SXiaoyao Li            env->features[FEAT_XSAVE_XSS_LO] |
173f15898b0SXiaoyao Li            env->features[FEAT_XSAVE_XSS_HI];
174f15898b0SXiaoyao Li 
175f15898b0SXiaoyao Li     if (xfam & ~tdx_caps->supported_xfam) {
176f15898b0SXiaoyao Li         error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))",
177f15898b0SXiaoyao Li                    xfam, tdx_caps->supported_xfam);
178f15898b0SXiaoyao Li         return -1;
179f15898b0SXiaoyao Li     }
180f15898b0SXiaoyao Li 
181f15898b0SXiaoyao Li     tdx_guest->xfam = xfam;
182f15898b0SXiaoyao Li     return 0;
183f15898b0SXiaoyao Li }
184f15898b0SXiaoyao Li 
185f15898b0SXiaoyao Li static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
186f15898b0SXiaoyao Li {
187f15898b0SXiaoyao Li     int i, dest_cnt = 0;
188f15898b0SXiaoyao Li     struct kvm_cpuid_entry2 *src, *dest, *conf;
189f15898b0SXiaoyao Li 
190f15898b0SXiaoyao Li     for (i = 0; i < cpuids->nent; i++) {
191f15898b0SXiaoyao Li         src = cpuids->entries + i;
192f15898b0SXiaoyao Li         conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
193f15898b0SXiaoyao Li         if (!conf) {
194f15898b0SXiaoyao Li             continue;
195f15898b0SXiaoyao Li         }
196f15898b0SXiaoyao Li         dest = cpuids->entries + dest_cnt;
197f15898b0SXiaoyao Li 
198f15898b0SXiaoyao Li         dest->function = src->function;
199f15898b0SXiaoyao Li         dest->index = src->index;
200f15898b0SXiaoyao Li         dest->flags = src->flags;
201f15898b0SXiaoyao Li         dest->eax = src->eax & conf->eax;
202f15898b0SXiaoyao Li         dest->ebx = src->ebx & conf->ebx;
203f15898b0SXiaoyao Li         dest->ecx = src->ecx & conf->ecx;
204f15898b0SXiaoyao Li         dest->edx = src->edx & conf->edx;
205f15898b0SXiaoyao Li 
206f15898b0SXiaoyao Li         dest_cnt++;
207f15898b0SXiaoyao Li     }
208f15898b0SXiaoyao Li     cpuids->nent = dest_cnt++;
209f15898b0SXiaoyao Li }
210f15898b0SXiaoyao Li 
211f15898b0SXiaoyao Li int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
212f15898b0SXiaoyao Li {
213f15898b0SXiaoyao Li     X86CPU *x86cpu = X86_CPU(cpu);
214f15898b0SXiaoyao Li     CPUX86State *env = &x86cpu->env;
215f15898b0SXiaoyao Li     g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
216f15898b0SXiaoyao Li     Error *local_err = NULL;
217f15898b0SXiaoyao Li     int retry = 10000;
218f15898b0SXiaoyao Li     int r = 0;
219f15898b0SXiaoyao Li 
220f15898b0SXiaoyao Li     QEMU_LOCK_GUARD(&tdx_guest->lock);
221f15898b0SXiaoyao Li     if (tdx_guest->initialized) {
222f15898b0SXiaoyao Li         return r;
223f15898b0SXiaoyao Li     }
224f15898b0SXiaoyao Li 
225f15898b0SXiaoyao Li     init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
226f15898b0SXiaoyao Li                         sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
227f15898b0SXiaoyao Li 
228*bb3be394SXiaoyao Li     setup_td_guest_attributes(x86cpu);
229*bb3be394SXiaoyao Li 
230f15898b0SXiaoyao Li     r = setup_td_xfam(x86cpu, errp);
231f15898b0SXiaoyao Li     if (r) {
232f15898b0SXiaoyao Li         return r;
233f15898b0SXiaoyao Li     }
234f15898b0SXiaoyao Li 
235f15898b0SXiaoyao Li     init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
236f15898b0SXiaoyao Li     tdx_filter_cpuid(&init_vm->cpuid);
237f15898b0SXiaoyao Li 
238f15898b0SXiaoyao Li     init_vm->attributes = tdx_guest->attributes;
239f15898b0SXiaoyao Li     init_vm->xfam = tdx_guest->xfam;
240f15898b0SXiaoyao Li 
241f15898b0SXiaoyao Li     /*
242f15898b0SXiaoyao Li      * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE)
243f15898b0SXiaoyao Li      * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or
244f15898b0SXiaoyao Li      * RDSEED) is busy.
245f15898b0SXiaoyao Li      *
246f15898b0SXiaoyao Li      * Retry for the case.
247f15898b0SXiaoyao Li      */
248f15898b0SXiaoyao Li     do {
249f15898b0SXiaoyao Li         error_free(local_err);
250f15898b0SXiaoyao Li         local_err = NULL;
251f15898b0SXiaoyao Li         r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err);
252f15898b0SXiaoyao Li     } while (r == -EAGAIN && --retry);
253f15898b0SXiaoyao Li 
254f15898b0SXiaoyao Li     if (r < 0) {
255f15898b0SXiaoyao Li         if (!retry) {
256f15898b0SXiaoyao Li             error_append_hint(&local_err, "Hardware RNG (Random Number "
257f15898b0SXiaoyao Li             "Generator) is busy occupied by someone (via RDRAND/RDSEED) "
258f15898b0SXiaoyao Li             "maliciously, which leads to KVM_TDX_INIT_VM keeping failure "
259f15898b0SXiaoyao Li             "due to lack of entropy.\n");
260f15898b0SXiaoyao Li         }
261f15898b0SXiaoyao Li         error_propagate(errp, local_err);
262f15898b0SXiaoyao Li         return r;
263f15898b0SXiaoyao Li     }
264f15898b0SXiaoyao Li 
265f15898b0SXiaoyao Li     tdx_guest->initialized = true;
266f15898b0SXiaoyao Li 
267f15898b0SXiaoyao Li     return 0;
268f15898b0SXiaoyao Li }
269f15898b0SXiaoyao Li 
2706016e297SXiaoyao Li static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp)
2716016e297SXiaoyao Li {
2726016e297SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(obj);
2736016e297SXiaoyao Li 
2746016e297SXiaoyao Li     return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE);
2756016e297SXiaoyao Li }
2766016e297SXiaoyao Li 
2776016e297SXiaoyao Li static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp)
2786016e297SXiaoyao Li {
2796016e297SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(obj);
2806016e297SXiaoyao Li 
2816016e297SXiaoyao Li     if (value) {
2826016e297SXiaoyao Li         tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
2836016e297SXiaoyao Li     } else {
2846016e297SXiaoyao Li         tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
2856016e297SXiaoyao Li     }
2866016e297SXiaoyao Li }
2876016e297SXiaoyao Li 
288756e12e7SXiaoyao Li /* tdx guest */
289756e12e7SXiaoyao Li OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
290756e12e7SXiaoyao Li                                    tdx_guest,
291756e12e7SXiaoyao Li                                    TDX_GUEST,
292756e12e7SXiaoyao Li                                    X86_CONFIDENTIAL_GUEST,
293756e12e7SXiaoyao Li                                    { TYPE_USER_CREATABLE },
294756e12e7SXiaoyao Li                                    { NULL })
295756e12e7SXiaoyao Li 
296756e12e7SXiaoyao Li static void tdx_guest_init(Object *obj)
297756e12e7SXiaoyao Li {
298756e12e7SXiaoyao Li     ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
299756e12e7SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(obj);
300756e12e7SXiaoyao Li 
301f15898b0SXiaoyao Li     qemu_mutex_init(&tdx->lock);
302f15898b0SXiaoyao Li 
303756e12e7SXiaoyao Li     cgs->require_guest_memfd = true;
304714af522SIsaku Yamahata     tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
305756e12e7SXiaoyao Li 
306756e12e7SXiaoyao Li     object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
307756e12e7SXiaoyao Li                                    OBJ_PROP_FLAG_READWRITE);
3086016e297SXiaoyao Li     object_property_add_bool(obj, "sept-ve-disable",
3096016e297SXiaoyao Li                              tdx_guest_get_sept_ve_disable,
3106016e297SXiaoyao Li                              tdx_guest_set_sept_ve_disable);
311756e12e7SXiaoyao Li }
312756e12e7SXiaoyao Li 
313756e12e7SXiaoyao Li static void tdx_guest_finalize(Object *obj)
314756e12e7SXiaoyao Li {
315756e12e7SXiaoyao Li }
316756e12e7SXiaoyao Li 
317756e12e7SXiaoyao Li static void tdx_guest_class_init(ObjectClass *oc, const void *data)
318756e12e7SXiaoyao Li {
319631a2ac5SXiaoyao Li     ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
320b455880eSXiaoyao Li     X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
321b455880eSXiaoyao Li 
322631a2ac5SXiaoyao Li     klass->kvm_init = tdx_kvm_init;
323b455880eSXiaoyao Li     x86_klass->kvm_type = tdx_kvm_type;
324756e12e7SXiaoyao Li }
325