xref: /qemu/target/i386/kvm/tdx.c (revision a731425980a4d3f8bb96fc41893b6437672875ee)
1756e12e7SXiaoyao Li /*
2756e12e7SXiaoyao Li  * QEMU TDX support
3756e12e7SXiaoyao Li  *
4756e12e7SXiaoyao Li  * Copyright (c) 2025 Intel Corporation
5756e12e7SXiaoyao Li  *
6756e12e7SXiaoyao Li  * Author:
7756e12e7SXiaoyao Li  *      Xiaoyao Li <xiaoyao.li@intel.com>
8756e12e7SXiaoyao Li  *
9756e12e7SXiaoyao Li  * SPDX-License-Identifier: GPL-2.0-or-later
10756e12e7SXiaoyao Li  */
11756e12e7SXiaoyao Li 
12756e12e7SXiaoyao Li #include "qemu/osdep.h"
138eddedc3SXiaoyao Li #include "qemu/error-report.h"
14d05a0858SIsaku Yamahata #include "qemu/base64.h"
154420ba0eSXiaoyao Li #include "qemu/mmap-alloc.h"
168eddedc3SXiaoyao Li #include "qapi/error.h"
17756e12e7SXiaoyao Li #include "qom/object_interfaces.h"
18d05a0858SIsaku Yamahata #include "crypto/hash.h"
194420ba0eSXiaoyao Li #include "system/system.h"
20756e12e7SXiaoyao Li 
21f18672e4SXiaoyao Li #include "hw/i386/e820_memory_layout.h"
224420ba0eSXiaoyao Li #include "hw/i386/tdvf.h"
23631a2ac5SXiaoyao Li #include "hw/i386/x86.h"
24*a7314259SXiaoyao Li #include "hw/i386/tdvf-hob.h"
25b455880eSXiaoyao Li #include "kvm_i386.h"
26756e12e7SXiaoyao Li #include "tdx.h"
27756e12e7SXiaoyao Li 
280e73b843SXiaoyao Li #define TDX_MIN_TSC_FREQUENCY_KHZ   (100 * 1000)
290e73b843SXiaoyao Li #define TDX_MAX_TSC_FREQUENCY_KHZ   (10 * 1000 * 1000)
300e73b843SXiaoyao Li 
3153b6f406SXiaoyao Li #define TDX_TD_ATTRIBUTES_DEBUG             BIT_ULL(0)
326016e297SXiaoyao Li #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE   BIT_ULL(28)
33bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PKS               BIT_ULL(30)
34bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PERFMON           BIT_ULL(63)
356016e297SXiaoyao Li 
3653b6f406SXiaoyao Li #define TDX_SUPPORTED_TD_ATTRS  (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\
3753b6f406SXiaoyao Li                                  TDX_TD_ATTRIBUTES_PKS | \
3853b6f406SXiaoyao Li                                  TDX_TD_ATTRIBUTES_PERFMON)
3953b6f406SXiaoyao Li 
401619d0e4SXiaoyao Li static TdxGuest *tdx_guest;
411619d0e4SXiaoyao Li 
428eddedc3SXiaoyao Li static struct kvm_tdx_capabilities *tdx_caps;
438eddedc3SXiaoyao Li 
441619d0e4SXiaoyao Li /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */
451619d0e4SXiaoyao Li bool is_tdx_vm(void)
461619d0e4SXiaoyao Li {
471619d0e4SXiaoyao Li     return !!tdx_guest;
481619d0e4SXiaoyao Li }
491619d0e4SXiaoyao Li 
508eddedc3SXiaoyao Li enum tdx_ioctl_level {
518eddedc3SXiaoyao Li     TDX_VM_IOCTL,
528eddedc3SXiaoyao Li     TDX_VCPU_IOCTL,
538eddedc3SXiaoyao Li };
548eddedc3SXiaoyao Li 
558eddedc3SXiaoyao Li static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state,
568eddedc3SXiaoyao Li                               int cmd_id, __u32 flags, void *data,
578eddedc3SXiaoyao Li                               Error **errp)
58631a2ac5SXiaoyao Li {
598eddedc3SXiaoyao Li     struct kvm_tdx_cmd tdx_cmd = {};
608eddedc3SXiaoyao Li     int r;
618eddedc3SXiaoyao Li 
628eddedc3SXiaoyao Li     const char *tdx_ioctl_name[] = {
638eddedc3SXiaoyao Li         [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES",
648eddedc3SXiaoyao Li         [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM",
658eddedc3SXiaoyao Li         [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU",
668eddedc3SXiaoyao Li         [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION",
678eddedc3SXiaoyao Li         [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM",
688eddedc3SXiaoyao Li         [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID",
698eddedc3SXiaoyao Li     };
708eddedc3SXiaoyao Li 
718eddedc3SXiaoyao Li     tdx_cmd.id = cmd_id;
728eddedc3SXiaoyao Li     tdx_cmd.flags = flags;
738eddedc3SXiaoyao Li     tdx_cmd.data = (__u64)(unsigned long)data;
748eddedc3SXiaoyao Li 
758eddedc3SXiaoyao Li     switch (level) {
768eddedc3SXiaoyao Li     case TDX_VM_IOCTL:
778eddedc3SXiaoyao Li         r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
788eddedc3SXiaoyao Li         break;
798eddedc3SXiaoyao Li     case TDX_VCPU_IOCTL:
808eddedc3SXiaoyao Li         r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
818eddedc3SXiaoyao Li         break;
828eddedc3SXiaoyao Li     default:
838eddedc3SXiaoyao Li         error_setg(errp, "Invalid tdx_ioctl_level %d", level);
848eddedc3SXiaoyao Li         return -EINVAL;
858eddedc3SXiaoyao Li     }
868eddedc3SXiaoyao Li 
878eddedc3SXiaoyao Li     if (r < 0) {
888eddedc3SXiaoyao Li         error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx",
898eddedc3SXiaoyao Li                          tdx_ioctl_name[cmd_id], tdx_cmd.hw_error);
908eddedc3SXiaoyao Li     }
918eddedc3SXiaoyao Li     return r;
928eddedc3SXiaoyao Li }
938eddedc3SXiaoyao Li 
948eddedc3SXiaoyao Li static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data,
958eddedc3SXiaoyao Li                                Error **errp)
968eddedc3SXiaoyao Li {
978eddedc3SXiaoyao Li     return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp);
988eddedc3SXiaoyao Li }
998eddedc3SXiaoyao Li 
1008eddedc3SXiaoyao Li static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags,
1018eddedc3SXiaoyao Li                                  void *data, Error **errp)
1028eddedc3SXiaoyao Li {
1038eddedc3SXiaoyao Li     return  tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp);
1048eddedc3SXiaoyao Li }
1058eddedc3SXiaoyao Li 
1068eddedc3SXiaoyao Li static int get_tdx_capabilities(Error **errp)
1078eddedc3SXiaoyao Li {
1088eddedc3SXiaoyao Li     struct kvm_tdx_capabilities *caps;
1098eddedc3SXiaoyao Li     /* 1st generation of TDX reports 6 cpuid configs */
1108eddedc3SXiaoyao Li     int nr_cpuid_configs = 6;
1118eddedc3SXiaoyao Li     size_t size;
1128eddedc3SXiaoyao Li     int r;
1138eddedc3SXiaoyao Li 
1148eddedc3SXiaoyao Li     do {
1158eddedc3SXiaoyao Li         Error *local_err = NULL;
1168eddedc3SXiaoyao Li         size = sizeof(struct kvm_tdx_capabilities) +
1178eddedc3SXiaoyao Li                       nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2);
1188eddedc3SXiaoyao Li         caps = g_malloc0(size);
1198eddedc3SXiaoyao Li         caps->cpuid.nent = nr_cpuid_configs;
1208eddedc3SXiaoyao Li 
1218eddedc3SXiaoyao Li         r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err);
1228eddedc3SXiaoyao Li         if (r == -E2BIG) {
1238eddedc3SXiaoyao Li             g_free(caps);
1248eddedc3SXiaoyao Li             nr_cpuid_configs *= 2;
1258eddedc3SXiaoyao Li             if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) {
1268eddedc3SXiaoyao Li                 error_report("KVM TDX seems broken that number of CPUID entries"
1278eddedc3SXiaoyao Li                              " in kvm_tdx_capabilities exceeds limit: %d",
1288eddedc3SXiaoyao Li                              KVM_MAX_CPUID_ENTRIES);
1298eddedc3SXiaoyao Li                 error_propagate(errp, local_err);
1308eddedc3SXiaoyao Li                 return r;
1318eddedc3SXiaoyao Li             }
1328eddedc3SXiaoyao Li             error_free(local_err);
1338eddedc3SXiaoyao Li         } else if (r < 0) {
1348eddedc3SXiaoyao Li             g_free(caps);
1358eddedc3SXiaoyao Li             error_propagate(errp, local_err);
1368eddedc3SXiaoyao Li             return r;
1378eddedc3SXiaoyao Li         }
1388eddedc3SXiaoyao Li     } while (r == -E2BIG);
1398eddedc3SXiaoyao Li 
1408eddedc3SXiaoyao Li     tdx_caps = caps;
141631a2ac5SXiaoyao Li 
142631a2ac5SXiaoyao Li     return 0;
143631a2ac5SXiaoyao Li }
144631a2ac5SXiaoyao Li 
1450dd5fe5eSChao Peng void tdx_set_tdvf_region(MemoryRegion *tdvf_mr)
1460dd5fe5eSChao Peng {
1470dd5fe5eSChao Peng     assert(!tdx_guest->tdvf_mr);
1480dd5fe5eSChao Peng     tdx_guest->tdvf_mr = tdvf_mr;
1490dd5fe5eSChao Peng }
1500dd5fe5eSChao Peng 
151*a7314259SXiaoyao Li static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx)
152*a7314259SXiaoyao Li {
153*a7314259SXiaoyao Li     TdxFirmwareEntry *entry;
154*a7314259SXiaoyao Li 
155*a7314259SXiaoyao Li     for_each_tdx_fw_entry(&tdx->tdvf, entry) {
156*a7314259SXiaoyao Li         if (entry->type == TDVF_SECTION_TYPE_TD_HOB) {
157*a7314259SXiaoyao Li             return entry;
158*a7314259SXiaoyao Li         }
159*a7314259SXiaoyao Li     }
160*a7314259SXiaoyao Li     error_report("TDVF metadata doesn't specify TD_HOB location.");
161*a7314259SXiaoyao Li     exit(1);
162*a7314259SXiaoyao Li }
163*a7314259SXiaoyao Li 
164f18672e4SXiaoyao Li static void tdx_add_ram_entry(uint64_t address, uint64_t length,
165f18672e4SXiaoyao Li                               enum TdxRamType type)
166f18672e4SXiaoyao Li {
167f18672e4SXiaoyao Li     uint32_t nr_entries = tdx_guest->nr_ram_entries;
168f18672e4SXiaoyao Li     tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries,
169f18672e4SXiaoyao Li                                      nr_entries + 1);
170f18672e4SXiaoyao Li 
171f18672e4SXiaoyao Li     tdx_guest->ram_entries[nr_entries].address = address;
172f18672e4SXiaoyao Li     tdx_guest->ram_entries[nr_entries].length = length;
173f18672e4SXiaoyao Li     tdx_guest->ram_entries[nr_entries].type = type;
174f18672e4SXiaoyao Li     tdx_guest->nr_ram_entries++;
175f18672e4SXiaoyao Li }
176f18672e4SXiaoyao Li 
177f18672e4SXiaoyao Li static int tdx_accept_ram_range(uint64_t address, uint64_t length)
178f18672e4SXiaoyao Li {
179f18672e4SXiaoyao Li     uint64_t head_start, tail_start, head_length, tail_length;
180f18672e4SXiaoyao Li     uint64_t tmp_address, tmp_length;
181f18672e4SXiaoyao Li     TdxRamEntry *e;
182f18672e4SXiaoyao Li     int i = 0;
183f18672e4SXiaoyao Li 
184f18672e4SXiaoyao Li     do {
185f18672e4SXiaoyao Li         if (i == tdx_guest->nr_ram_entries) {
186f18672e4SXiaoyao Li             return -1;
187f18672e4SXiaoyao Li         }
188f18672e4SXiaoyao Li 
189f18672e4SXiaoyao Li         e = &tdx_guest->ram_entries[i++];
190f18672e4SXiaoyao Li     } while (address + length <= e->address || address >= e->address + e->length);
191f18672e4SXiaoyao Li 
192f18672e4SXiaoyao Li     /*
193f18672e4SXiaoyao Li      * The to-be-accepted ram range must be fully contained by one
194f18672e4SXiaoyao Li      * RAM entry.
195f18672e4SXiaoyao Li      */
196f18672e4SXiaoyao Li     if (e->address > address ||
197f18672e4SXiaoyao Li         e->address + e->length < address + length) {
198f18672e4SXiaoyao Li         return -1;
199f18672e4SXiaoyao Li     }
200f18672e4SXiaoyao Li 
201f18672e4SXiaoyao Li     if (e->type == TDX_RAM_ADDED) {
202f18672e4SXiaoyao Li         return 0;
203f18672e4SXiaoyao Li     }
204f18672e4SXiaoyao Li 
205f18672e4SXiaoyao Li     tmp_address = e->address;
206f18672e4SXiaoyao Li     tmp_length = e->length;
207f18672e4SXiaoyao Li 
208f18672e4SXiaoyao Li     e->address = address;
209f18672e4SXiaoyao Li     e->length = length;
210f18672e4SXiaoyao Li     e->type = TDX_RAM_ADDED;
211f18672e4SXiaoyao Li 
212f18672e4SXiaoyao Li     head_length = address - tmp_address;
213f18672e4SXiaoyao Li     if (head_length > 0) {
214f18672e4SXiaoyao Li         head_start = tmp_address;
215f18672e4SXiaoyao Li         tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED);
216f18672e4SXiaoyao Li     }
217f18672e4SXiaoyao Li 
218f18672e4SXiaoyao Li     tail_start = address + length;
219f18672e4SXiaoyao Li     if (tail_start < tmp_address + tmp_length) {
220f18672e4SXiaoyao Li         tail_length = tmp_address + tmp_length - tail_start;
221f18672e4SXiaoyao Li         tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED);
222f18672e4SXiaoyao Li     }
223f18672e4SXiaoyao Li 
224f18672e4SXiaoyao Li     return 0;
225f18672e4SXiaoyao Li }
226f18672e4SXiaoyao Li 
227f18672e4SXiaoyao Li static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_)
228f18672e4SXiaoyao Li {
229f18672e4SXiaoyao Li     const TdxRamEntry *lhs = lhs_;
230f18672e4SXiaoyao Li     const TdxRamEntry *rhs = rhs_;
231f18672e4SXiaoyao Li 
232f18672e4SXiaoyao Li     if (lhs->address == rhs->address) {
233f18672e4SXiaoyao Li         return 0;
234f18672e4SXiaoyao Li     }
235f18672e4SXiaoyao Li     if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) {
236f18672e4SXiaoyao Li         return 1;
237f18672e4SXiaoyao Li     }
238f18672e4SXiaoyao Li     return -1;
239f18672e4SXiaoyao Li }
240f18672e4SXiaoyao Li 
241f18672e4SXiaoyao Li static void tdx_init_ram_entries(void)
242f18672e4SXiaoyao Li {
243f18672e4SXiaoyao Li     unsigned i, j, nr_e820_entries;
244f18672e4SXiaoyao Li 
245f18672e4SXiaoyao Li     nr_e820_entries = e820_get_table(NULL);
246f18672e4SXiaoyao Li     tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries);
247f18672e4SXiaoyao Li 
248f18672e4SXiaoyao Li     for (i = 0, j = 0; i < nr_e820_entries; i++) {
249f18672e4SXiaoyao Li         uint64_t addr, len;
250f18672e4SXiaoyao Li 
251f18672e4SXiaoyao Li         if (e820_get_entry(i, E820_RAM, &addr, &len)) {
252f18672e4SXiaoyao Li             tdx_guest->ram_entries[j].address = addr;
253f18672e4SXiaoyao Li             tdx_guest->ram_entries[j].length = len;
254f18672e4SXiaoyao Li             tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED;
255f18672e4SXiaoyao Li             j++;
256f18672e4SXiaoyao Li         }
257f18672e4SXiaoyao Li     }
258f18672e4SXiaoyao Li     tdx_guest->nr_ram_entries = j;
259f18672e4SXiaoyao Li }
260f18672e4SXiaoyao Li 
2614420ba0eSXiaoyao Li static void tdx_finalize_vm(Notifier *notifier, void *unused)
2624420ba0eSXiaoyao Li {
2634420ba0eSXiaoyao Li     TdxFirmware *tdvf = &tdx_guest->tdvf;
2644420ba0eSXiaoyao Li     TdxFirmwareEntry *entry;
2654420ba0eSXiaoyao Li 
266f18672e4SXiaoyao Li     tdx_init_ram_entries();
267f18672e4SXiaoyao Li 
2684420ba0eSXiaoyao Li     for_each_tdx_fw_entry(tdvf, entry) {
2694420ba0eSXiaoyao Li         switch (entry->type) {
2704420ba0eSXiaoyao Li         case TDVF_SECTION_TYPE_BFV:
2714420ba0eSXiaoyao Li         case TDVF_SECTION_TYPE_CFV:
2724420ba0eSXiaoyao Li             entry->mem_ptr = tdvf->mem_ptr + entry->data_offset;
2734420ba0eSXiaoyao Li             break;
2744420ba0eSXiaoyao Li         case TDVF_SECTION_TYPE_TD_HOB:
2754420ba0eSXiaoyao Li         case TDVF_SECTION_TYPE_TEMP_MEM:
2764420ba0eSXiaoyao Li             entry->mem_ptr = qemu_ram_mmap(-1, entry->size,
2774420ba0eSXiaoyao Li                                            qemu_real_host_page_size(), 0, 0);
2784420ba0eSXiaoyao Li             if (entry->mem_ptr == MAP_FAILED) {
2794420ba0eSXiaoyao Li                 error_report("Failed to mmap memory for TDVF section %d",
2804420ba0eSXiaoyao Li                              entry->type);
2814420ba0eSXiaoyao Li                 exit(1);
2824420ba0eSXiaoyao Li             }
283f18672e4SXiaoyao Li             if (tdx_accept_ram_range(entry->address, entry->size)) {
284f18672e4SXiaoyao Li                 error_report("Failed to accept memory for TDVF section %d",
285f18672e4SXiaoyao Li                              entry->type);
286f18672e4SXiaoyao Li                 qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
287f18672e4SXiaoyao Li                 exit(1);
288f18672e4SXiaoyao Li             }
2894420ba0eSXiaoyao Li             break;
2904420ba0eSXiaoyao Li         default:
2914420ba0eSXiaoyao Li             error_report("Unsupported TDVF section %d", entry->type);
2924420ba0eSXiaoyao Li             exit(1);
2934420ba0eSXiaoyao Li         }
2944420ba0eSXiaoyao Li     }
295f18672e4SXiaoyao Li 
296f18672e4SXiaoyao Li     qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries,
297f18672e4SXiaoyao Li           sizeof(TdxRamEntry), &tdx_ram_entry_compare);
298*a7314259SXiaoyao Li 
299*a7314259SXiaoyao Li     tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest));
3004420ba0eSXiaoyao Li }
3014420ba0eSXiaoyao Li 
3024420ba0eSXiaoyao Li static Notifier tdx_machine_done_notify = {
3034420ba0eSXiaoyao Li     .notify = tdx_finalize_vm,
3044420ba0eSXiaoyao Li };
3054420ba0eSXiaoyao Li 
3068eddedc3SXiaoyao Li static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
3078eddedc3SXiaoyao Li {
3081619d0e4SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(cgs);
3098eddedc3SXiaoyao Li     int r = 0;
3108eddedc3SXiaoyao Li 
3118eddedc3SXiaoyao Li     kvm_mark_guest_state_protected();
3128eddedc3SXiaoyao Li 
3138eddedc3SXiaoyao Li     if (!tdx_caps) {
3148eddedc3SXiaoyao Li         r = get_tdx_capabilities(errp);
3151619d0e4SXiaoyao Li         if (r) {
3161619d0e4SXiaoyao Li             return r;
3171619d0e4SXiaoyao Li         }
3188eddedc3SXiaoyao Li     }
3198eddedc3SXiaoyao Li 
3204420ba0eSXiaoyao Li     qemu_add_machine_init_done_notifier(&tdx_machine_done_notify);
3214420ba0eSXiaoyao Li 
3221619d0e4SXiaoyao Li     tdx_guest = tdx;
3231619d0e4SXiaoyao Li     return 0;
3248eddedc3SXiaoyao Li }
3258eddedc3SXiaoyao Li 
326b455880eSXiaoyao Li static int tdx_kvm_type(X86ConfidentialGuest *cg)
327b455880eSXiaoyao Li {
328b455880eSXiaoyao Li     /* Do the object check */
329b455880eSXiaoyao Li     TDX_GUEST(cg);
330b455880eSXiaoyao Li 
331b455880eSXiaoyao Li     return KVM_X86_TDX_VM;
332b455880eSXiaoyao Li }
333b455880eSXiaoyao Li 
33453b6f406SXiaoyao Li static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
33553b6f406SXiaoyao Li {
33653b6f406SXiaoyao Li     if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
33753b6f406SXiaoyao Li         error_setg(errp, "Invalid attributes 0x%lx for TDX VM "
33853b6f406SXiaoyao Li                    "(KVM supported: 0x%llx)", tdx->attributes,
33953b6f406SXiaoyao Li                    tdx_caps->supported_attrs);
34053b6f406SXiaoyao Li         return -1;
34153b6f406SXiaoyao Li     }
34253b6f406SXiaoyao Li 
34353b6f406SXiaoyao Li     if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) {
34453b6f406SXiaoyao Li         error_setg(errp, "Some QEMU unsupported TD attribute bits being "
34553b6f406SXiaoyao Li                     "requested: 0x%lx (QEMU supported: 0x%llx)",
34653b6f406SXiaoyao Li                     tdx->attributes, TDX_SUPPORTED_TD_ATTRS);
34753b6f406SXiaoyao Li         return -1;
34853b6f406SXiaoyao Li     }
34953b6f406SXiaoyao Li 
35053b6f406SXiaoyao Li     return 0;
35153b6f406SXiaoyao Li }
35253b6f406SXiaoyao Li 
35353b6f406SXiaoyao Li static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp)
354bb3be394SXiaoyao Li {
355bb3be394SXiaoyao Li     CPUX86State *env = &x86cpu->env;
356bb3be394SXiaoyao Li 
357bb3be394SXiaoyao Li     tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ?
358bb3be394SXiaoyao Li                              TDX_TD_ATTRIBUTES_PKS : 0;
359bb3be394SXiaoyao Li     tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0;
36053b6f406SXiaoyao Li 
36153b6f406SXiaoyao Li     return tdx_validate_attributes(tdx_guest, errp);
362bb3be394SXiaoyao Li }
363bb3be394SXiaoyao Li 
364f15898b0SXiaoyao Li static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
365f15898b0SXiaoyao Li {
366f15898b0SXiaoyao Li     CPUX86State *env = &x86cpu->env;
367f15898b0SXiaoyao Li     uint64_t xfam;
368f15898b0SXiaoyao Li 
369f15898b0SXiaoyao Li     xfam = env->features[FEAT_XSAVE_XCR0_LO] |
370f15898b0SXiaoyao Li            env->features[FEAT_XSAVE_XCR0_HI] |
371f15898b0SXiaoyao Li            env->features[FEAT_XSAVE_XSS_LO] |
372f15898b0SXiaoyao Li            env->features[FEAT_XSAVE_XSS_HI];
373f15898b0SXiaoyao Li 
374f15898b0SXiaoyao Li     if (xfam & ~tdx_caps->supported_xfam) {
375f15898b0SXiaoyao Li         error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))",
376f15898b0SXiaoyao Li                    xfam, tdx_caps->supported_xfam);
377f15898b0SXiaoyao Li         return -1;
378f15898b0SXiaoyao Li     }
379f15898b0SXiaoyao Li 
380f15898b0SXiaoyao Li     tdx_guest->xfam = xfam;
381f15898b0SXiaoyao Li     return 0;
382f15898b0SXiaoyao Li }
383f15898b0SXiaoyao Li 
384f15898b0SXiaoyao Li static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
385f15898b0SXiaoyao Li {
386f15898b0SXiaoyao Li     int i, dest_cnt = 0;
387f15898b0SXiaoyao Li     struct kvm_cpuid_entry2 *src, *dest, *conf;
388f15898b0SXiaoyao Li 
389f15898b0SXiaoyao Li     for (i = 0; i < cpuids->nent; i++) {
390f15898b0SXiaoyao Li         src = cpuids->entries + i;
391f15898b0SXiaoyao Li         conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
392f15898b0SXiaoyao Li         if (!conf) {
393f15898b0SXiaoyao Li             continue;
394f15898b0SXiaoyao Li         }
395f15898b0SXiaoyao Li         dest = cpuids->entries + dest_cnt;
396f15898b0SXiaoyao Li 
397f15898b0SXiaoyao Li         dest->function = src->function;
398f15898b0SXiaoyao Li         dest->index = src->index;
399f15898b0SXiaoyao Li         dest->flags = src->flags;
400f15898b0SXiaoyao Li         dest->eax = src->eax & conf->eax;
401f15898b0SXiaoyao Li         dest->ebx = src->ebx & conf->ebx;
402f15898b0SXiaoyao Li         dest->ecx = src->ecx & conf->ecx;
403f15898b0SXiaoyao Li         dest->edx = src->edx & conf->edx;
404f15898b0SXiaoyao Li 
405f15898b0SXiaoyao Li         dest_cnt++;
406f15898b0SXiaoyao Li     }
407f15898b0SXiaoyao Li     cpuids->nent = dest_cnt++;
408f15898b0SXiaoyao Li }
409f15898b0SXiaoyao Li 
410f15898b0SXiaoyao Li int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
411f15898b0SXiaoyao Li {
412f15898b0SXiaoyao Li     X86CPU *x86cpu = X86_CPU(cpu);
413f15898b0SXiaoyao Li     CPUX86State *env = &x86cpu->env;
414f15898b0SXiaoyao Li     g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
415f15898b0SXiaoyao Li     Error *local_err = NULL;
416d05a0858SIsaku Yamahata     size_t data_len;
417f15898b0SXiaoyao Li     int retry = 10000;
418f15898b0SXiaoyao Li     int r = 0;
419f15898b0SXiaoyao Li 
420f15898b0SXiaoyao Li     QEMU_LOCK_GUARD(&tdx_guest->lock);
421f15898b0SXiaoyao Li     if (tdx_guest->initialized) {
422f15898b0SXiaoyao Li         return r;
423f15898b0SXiaoyao Li     }
424f15898b0SXiaoyao Li 
425f15898b0SXiaoyao Li     init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
426f15898b0SXiaoyao Li                         sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
427f15898b0SXiaoyao Li 
428d529a2acSXiaoyao Li     if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) {
429d529a2acSXiaoyao Li         error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS");
430d529a2acSXiaoyao Li         return -EOPNOTSUPP;
431d529a2acSXiaoyao Li     }
432d529a2acSXiaoyao Li 
433d529a2acSXiaoyao Li     r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
434d529a2acSXiaoyao Li                           0, TDX_APIC_BUS_CYCLES_NS);
435d529a2acSXiaoyao Li     if (r < 0) {
436d529a2acSXiaoyao Li         error_setg_errno(errp, -r,
437d529a2acSXiaoyao Li                          "Unable to set core crystal clock frequency to 25MHz");
438d529a2acSXiaoyao Li         return r;
439d529a2acSXiaoyao Li     }
440d529a2acSXiaoyao Li 
4410e73b843SXiaoyao Li     if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ ||
4420e73b843SXiaoyao Li                          env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) {
4430e73b843SXiaoyao Li         error_setg(errp, "Invalid TSC %ld KHz, must specify cpu_frequency "
4440e73b843SXiaoyao Li                          "between [%d, %d] kHz", env->tsc_khz,
4450e73b843SXiaoyao Li                          TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ);
4460e73b843SXiaoyao Li        return -EINVAL;
4470e73b843SXiaoyao Li     }
4480e73b843SXiaoyao Li 
4490e73b843SXiaoyao Li     if (env->tsc_khz % (25 * 1000)) {
4500e73b843SXiaoyao Li         error_setg(errp, "Invalid TSC %ld KHz, it must be multiple of 25MHz",
4510e73b843SXiaoyao Li                    env->tsc_khz);
4520e73b843SXiaoyao Li         return -EINVAL;
4530e73b843SXiaoyao Li     }
4540e73b843SXiaoyao Li 
4550e73b843SXiaoyao Li     /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */
4560e73b843SXiaoyao Li     r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz);
4570e73b843SXiaoyao Li     if (r < 0) {
4580e73b843SXiaoyao Li         error_setg_errno(errp, -r, "Unable to set TSC frequency to %ld kHz",
4590e73b843SXiaoyao Li                          env->tsc_khz);
4600e73b843SXiaoyao Li         return r;
4610e73b843SXiaoyao Li     }
4620e73b843SXiaoyao Li 
463d05a0858SIsaku Yamahata     if (tdx_guest->mrconfigid) {
464d05a0858SIsaku Yamahata         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid,
465d05a0858SIsaku Yamahata                               strlen(tdx_guest->mrconfigid), &data_len, errp);
466d05a0858SIsaku Yamahata         if (!data) {
467d05a0858SIsaku Yamahata             return -1;
468d05a0858SIsaku Yamahata         }
469d05a0858SIsaku Yamahata         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
470d05a0858SIsaku Yamahata             error_setg(errp, "TDX: failed to decode mrconfigid");
471d05a0858SIsaku Yamahata             return -1;
472d05a0858SIsaku Yamahata         }
473d05a0858SIsaku Yamahata         memcpy(init_vm->mrconfigid, data, data_len);
474d05a0858SIsaku Yamahata     }
475d05a0858SIsaku Yamahata 
476d05a0858SIsaku Yamahata     if (tdx_guest->mrowner) {
477d05a0858SIsaku Yamahata         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner,
478d05a0858SIsaku Yamahata                               strlen(tdx_guest->mrowner), &data_len, errp);
479d05a0858SIsaku Yamahata         if (!data) {
480d05a0858SIsaku Yamahata             return -1;
481d05a0858SIsaku Yamahata         }
482d05a0858SIsaku Yamahata         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
483d05a0858SIsaku Yamahata             error_setg(errp, "TDX: failed to decode mrowner");
484d05a0858SIsaku Yamahata             return -1;
485d05a0858SIsaku Yamahata         }
486d05a0858SIsaku Yamahata         memcpy(init_vm->mrowner, data, data_len);
487d05a0858SIsaku Yamahata     }
488d05a0858SIsaku Yamahata 
489d05a0858SIsaku Yamahata     if (tdx_guest->mrownerconfig) {
490d05a0858SIsaku Yamahata         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig,
491d05a0858SIsaku Yamahata                             strlen(tdx_guest->mrownerconfig), &data_len, errp);
492d05a0858SIsaku Yamahata         if (!data) {
493d05a0858SIsaku Yamahata             return -1;
494d05a0858SIsaku Yamahata         }
495d05a0858SIsaku Yamahata         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
496d05a0858SIsaku Yamahata             error_setg(errp, "TDX: failed to decode mrownerconfig");
497d05a0858SIsaku Yamahata             return -1;
498d05a0858SIsaku Yamahata         }
499d05a0858SIsaku Yamahata         memcpy(init_vm->mrownerconfig, data, data_len);
500d05a0858SIsaku Yamahata     }
501d05a0858SIsaku Yamahata 
50253b6f406SXiaoyao Li     r = setup_td_guest_attributes(x86cpu, errp);
50353b6f406SXiaoyao Li     if (r) {
50453b6f406SXiaoyao Li         return r;
50553b6f406SXiaoyao Li     }
506bb3be394SXiaoyao Li 
507f15898b0SXiaoyao Li     r = setup_td_xfam(x86cpu, errp);
508f15898b0SXiaoyao Li     if (r) {
509f15898b0SXiaoyao Li         return r;
510f15898b0SXiaoyao Li     }
511f15898b0SXiaoyao Li 
512f15898b0SXiaoyao Li     init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
513f15898b0SXiaoyao Li     tdx_filter_cpuid(&init_vm->cpuid);
514f15898b0SXiaoyao Li 
515f15898b0SXiaoyao Li     init_vm->attributes = tdx_guest->attributes;
516f15898b0SXiaoyao Li     init_vm->xfam = tdx_guest->xfam;
517f15898b0SXiaoyao Li 
518f15898b0SXiaoyao Li     /*
519f15898b0SXiaoyao Li      * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE)
520f15898b0SXiaoyao Li      * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or
521f15898b0SXiaoyao Li      * RDSEED) is busy.
522f15898b0SXiaoyao Li      *
523f15898b0SXiaoyao Li      * Retry for the case.
524f15898b0SXiaoyao Li      */
525f15898b0SXiaoyao Li     do {
526f15898b0SXiaoyao Li         error_free(local_err);
527f15898b0SXiaoyao Li         local_err = NULL;
528f15898b0SXiaoyao Li         r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err);
529f15898b0SXiaoyao Li     } while (r == -EAGAIN && --retry);
530f15898b0SXiaoyao Li 
531f15898b0SXiaoyao Li     if (r < 0) {
532f15898b0SXiaoyao Li         if (!retry) {
533f15898b0SXiaoyao Li             error_append_hint(&local_err, "Hardware RNG (Random Number "
534f15898b0SXiaoyao Li             "Generator) is busy occupied by someone (via RDRAND/RDSEED) "
535f15898b0SXiaoyao Li             "maliciously, which leads to KVM_TDX_INIT_VM keeping failure "
536f15898b0SXiaoyao Li             "due to lack of entropy.\n");
537f15898b0SXiaoyao Li         }
538f15898b0SXiaoyao Li         error_propagate(errp, local_err);
539f15898b0SXiaoyao Li         return r;
540f15898b0SXiaoyao Li     }
541f15898b0SXiaoyao Li 
542f15898b0SXiaoyao Li     tdx_guest->initialized = true;
543f15898b0SXiaoyao Li 
544f15898b0SXiaoyao Li     return 0;
545f15898b0SXiaoyao Li }
546f15898b0SXiaoyao Li 
547cb5d65a8SXiaoyao Li int tdx_parse_tdvf(void *flash_ptr, int size)
548cb5d65a8SXiaoyao Li {
549cb5d65a8SXiaoyao Li     return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
550cb5d65a8SXiaoyao Li }
551cb5d65a8SXiaoyao Li 
5526016e297SXiaoyao Li static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp)
5536016e297SXiaoyao Li {
5546016e297SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(obj);
5556016e297SXiaoyao Li 
5566016e297SXiaoyao Li     return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE);
5576016e297SXiaoyao Li }
5586016e297SXiaoyao Li 
5596016e297SXiaoyao Li static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp)
5606016e297SXiaoyao Li {
5616016e297SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(obj);
5626016e297SXiaoyao Li 
5636016e297SXiaoyao Li     if (value) {
5646016e297SXiaoyao Li         tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
5656016e297SXiaoyao Li     } else {
5666016e297SXiaoyao Li         tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
5676016e297SXiaoyao Li     }
5686016e297SXiaoyao Li }
5696016e297SXiaoyao Li 
570d05a0858SIsaku Yamahata static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp)
571d05a0858SIsaku Yamahata {
572d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
573d05a0858SIsaku Yamahata 
574d05a0858SIsaku Yamahata     return g_strdup(tdx->mrconfigid);
575d05a0858SIsaku Yamahata }
576d05a0858SIsaku Yamahata 
577d05a0858SIsaku Yamahata static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp)
578d05a0858SIsaku Yamahata {
579d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
580d05a0858SIsaku Yamahata 
581d05a0858SIsaku Yamahata     g_free(tdx->mrconfigid);
582d05a0858SIsaku Yamahata     tdx->mrconfigid = g_strdup(value);
583d05a0858SIsaku Yamahata }
584d05a0858SIsaku Yamahata 
585d05a0858SIsaku Yamahata static char *tdx_guest_get_mrowner(Object *obj, Error **errp)
586d05a0858SIsaku Yamahata {
587d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
588d05a0858SIsaku Yamahata 
589d05a0858SIsaku Yamahata     return g_strdup(tdx->mrowner);
590d05a0858SIsaku Yamahata }
591d05a0858SIsaku Yamahata 
592d05a0858SIsaku Yamahata static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp)
593d05a0858SIsaku Yamahata {
594d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
595d05a0858SIsaku Yamahata 
596d05a0858SIsaku Yamahata     g_free(tdx->mrowner);
597d05a0858SIsaku Yamahata     tdx->mrowner = g_strdup(value);
598d05a0858SIsaku Yamahata }
599d05a0858SIsaku Yamahata 
600d05a0858SIsaku Yamahata static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp)
601d05a0858SIsaku Yamahata {
602d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
603d05a0858SIsaku Yamahata 
604d05a0858SIsaku Yamahata     return g_strdup(tdx->mrownerconfig);
605d05a0858SIsaku Yamahata }
606d05a0858SIsaku Yamahata 
607d05a0858SIsaku Yamahata static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp)
608d05a0858SIsaku Yamahata {
609d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
610d05a0858SIsaku Yamahata 
611d05a0858SIsaku Yamahata     g_free(tdx->mrownerconfig);
612d05a0858SIsaku Yamahata     tdx->mrownerconfig = g_strdup(value);
613d05a0858SIsaku Yamahata }
614d05a0858SIsaku Yamahata 
615756e12e7SXiaoyao Li /* tdx guest */
616756e12e7SXiaoyao Li OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
617756e12e7SXiaoyao Li                                    tdx_guest,
618756e12e7SXiaoyao Li                                    TDX_GUEST,
619756e12e7SXiaoyao Li                                    X86_CONFIDENTIAL_GUEST,
620756e12e7SXiaoyao Li                                    { TYPE_USER_CREATABLE },
621756e12e7SXiaoyao Li                                    { NULL })
622756e12e7SXiaoyao Li 
623756e12e7SXiaoyao Li static void tdx_guest_init(Object *obj)
624756e12e7SXiaoyao Li {
625756e12e7SXiaoyao Li     ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
626756e12e7SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(obj);
627756e12e7SXiaoyao Li 
628f15898b0SXiaoyao Li     qemu_mutex_init(&tdx->lock);
629f15898b0SXiaoyao Li 
630756e12e7SXiaoyao Li     cgs->require_guest_memfd = true;
631714af522SIsaku Yamahata     tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
632756e12e7SXiaoyao Li 
633756e12e7SXiaoyao Li     object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
634756e12e7SXiaoyao Li                                    OBJ_PROP_FLAG_READWRITE);
6356016e297SXiaoyao Li     object_property_add_bool(obj, "sept-ve-disable",
6366016e297SXiaoyao Li                              tdx_guest_get_sept_ve_disable,
6376016e297SXiaoyao Li                              tdx_guest_set_sept_ve_disable);
638d05a0858SIsaku Yamahata     object_property_add_str(obj, "mrconfigid",
639d05a0858SIsaku Yamahata                             tdx_guest_get_mrconfigid,
640d05a0858SIsaku Yamahata                             tdx_guest_set_mrconfigid);
641d05a0858SIsaku Yamahata     object_property_add_str(obj, "mrowner",
642d05a0858SIsaku Yamahata                             tdx_guest_get_mrowner, tdx_guest_set_mrowner);
643d05a0858SIsaku Yamahata     object_property_add_str(obj, "mrownerconfig",
644d05a0858SIsaku Yamahata                             tdx_guest_get_mrownerconfig,
645d05a0858SIsaku Yamahata                             tdx_guest_set_mrownerconfig);
646756e12e7SXiaoyao Li }
647756e12e7SXiaoyao Li 
648756e12e7SXiaoyao Li static void tdx_guest_finalize(Object *obj)
649756e12e7SXiaoyao Li {
650756e12e7SXiaoyao Li }
651756e12e7SXiaoyao Li 
652756e12e7SXiaoyao Li static void tdx_guest_class_init(ObjectClass *oc, const void *data)
653756e12e7SXiaoyao Li {
654631a2ac5SXiaoyao Li     ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
655b455880eSXiaoyao Li     X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
656b455880eSXiaoyao Li 
657631a2ac5SXiaoyao Li     klass->kvm_init = tdx_kvm_init;
658b455880eSXiaoyao Li     x86_klass->kvm_type = tdx_kvm_type;
659756e12e7SXiaoyao Li }
660