xref: /qemu/target/i386/kvm/tdx.c (revision 75ec6189f5c65cab210dd9f16cf4eef368038d45)
1756e12e7SXiaoyao Li /*
2756e12e7SXiaoyao Li  * QEMU TDX support
3756e12e7SXiaoyao Li  *
4756e12e7SXiaoyao Li  * Copyright (c) 2025 Intel Corporation
5756e12e7SXiaoyao Li  *
6756e12e7SXiaoyao Li  * Author:
7756e12e7SXiaoyao Li  *      Xiaoyao Li <xiaoyao.li@intel.com>
8756e12e7SXiaoyao Li  *
9756e12e7SXiaoyao Li  * SPDX-License-Identifier: GPL-2.0-or-later
10756e12e7SXiaoyao Li  */
11756e12e7SXiaoyao Li 
12756e12e7SXiaoyao Li #include "qemu/osdep.h"
138eddedc3SXiaoyao Li #include "qemu/error-report.h"
14d05a0858SIsaku Yamahata #include "qemu/base64.h"
154420ba0eSXiaoyao Li #include "qemu/mmap-alloc.h"
168eddedc3SXiaoyao Li #include "qapi/error.h"
17756e12e7SXiaoyao Li #include "qom/object_interfaces.h"
18d05a0858SIsaku Yamahata #include "crypto/hash.h"
19bb45580dSXiaoyao Li #include "system/kvm_int.h"
206e250463SXiaoyao Li #include "system/runstate.h"
214420ba0eSXiaoyao Li #include "system/system.h"
22ebc2d2b4SIsaku Yamahata #include "system/ramblock.h"
23756e12e7SXiaoyao Li 
241ff5048dSXiaoyao Li #include <linux/kvm_para.h>
251ff5048dSXiaoyao Li 
26f18672e4SXiaoyao Li #include "hw/i386/e820_memory_layout.h"
274420ba0eSXiaoyao Li #include "hw/i386/tdvf.h"
28631a2ac5SXiaoyao Li #include "hw/i386/x86.h"
29a7314259SXiaoyao Li #include "hw/i386/tdvf-hob.h"
30b455880eSXiaoyao Li #include "kvm_i386.h"
31756e12e7SXiaoyao Li #include "tdx.h"
32756e12e7SXiaoyao Li 
330e73b843SXiaoyao Li #define TDX_MIN_TSC_FREQUENCY_KHZ   (100 * 1000)
340e73b843SXiaoyao Li #define TDX_MAX_TSC_FREQUENCY_KHZ   (10 * 1000 * 1000)
350e73b843SXiaoyao Li 
3653b6f406SXiaoyao Li #define TDX_TD_ATTRIBUTES_DEBUG             BIT_ULL(0)
376016e297SXiaoyao Li #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE   BIT_ULL(28)
38bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PKS               BIT_ULL(30)
39bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PERFMON           BIT_ULL(63)
406016e297SXiaoyao Li 
4153b6f406SXiaoyao Li #define TDX_SUPPORTED_TD_ATTRS  (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\
4253b6f406SXiaoyao Li                                  TDX_TD_ATTRIBUTES_PKS | \
4353b6f406SXiaoyao Li                                  TDX_TD_ATTRIBUTES_PERFMON)
4453b6f406SXiaoyao Li 
451619d0e4SXiaoyao Li static TdxGuest *tdx_guest;
461619d0e4SXiaoyao Li 
478eddedc3SXiaoyao Li static struct kvm_tdx_capabilities *tdx_caps;
48*75ec6189SXiaoyao Li static struct kvm_cpuid2 *tdx_supported_cpuid;
498eddedc3SXiaoyao Li 
501619d0e4SXiaoyao Li /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */
511619d0e4SXiaoyao Li bool is_tdx_vm(void)
521619d0e4SXiaoyao Li {
531619d0e4SXiaoyao Li     return !!tdx_guest;
541619d0e4SXiaoyao Li }
551619d0e4SXiaoyao Li 
568eddedc3SXiaoyao Li enum tdx_ioctl_level {
578eddedc3SXiaoyao Li     TDX_VM_IOCTL,
588eddedc3SXiaoyao Li     TDX_VCPU_IOCTL,
598eddedc3SXiaoyao Li };
608eddedc3SXiaoyao Li 
618eddedc3SXiaoyao Li static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state,
628eddedc3SXiaoyao Li                               int cmd_id, __u32 flags, void *data,
638eddedc3SXiaoyao Li                               Error **errp)
64631a2ac5SXiaoyao Li {
658eddedc3SXiaoyao Li     struct kvm_tdx_cmd tdx_cmd = {};
668eddedc3SXiaoyao Li     int r;
678eddedc3SXiaoyao Li 
688eddedc3SXiaoyao Li     const char *tdx_ioctl_name[] = {
698eddedc3SXiaoyao Li         [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES",
708eddedc3SXiaoyao Li         [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM",
718eddedc3SXiaoyao Li         [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU",
728eddedc3SXiaoyao Li         [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION",
738eddedc3SXiaoyao Li         [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM",
748eddedc3SXiaoyao Li         [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID",
758eddedc3SXiaoyao Li     };
768eddedc3SXiaoyao Li 
778eddedc3SXiaoyao Li     tdx_cmd.id = cmd_id;
788eddedc3SXiaoyao Li     tdx_cmd.flags = flags;
798eddedc3SXiaoyao Li     tdx_cmd.data = (__u64)(unsigned long)data;
808eddedc3SXiaoyao Li 
818eddedc3SXiaoyao Li     switch (level) {
828eddedc3SXiaoyao Li     case TDX_VM_IOCTL:
838eddedc3SXiaoyao Li         r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
848eddedc3SXiaoyao Li         break;
858eddedc3SXiaoyao Li     case TDX_VCPU_IOCTL:
868eddedc3SXiaoyao Li         r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
878eddedc3SXiaoyao Li         break;
888eddedc3SXiaoyao Li     default:
898eddedc3SXiaoyao Li         error_setg(errp, "Invalid tdx_ioctl_level %d", level);
908eddedc3SXiaoyao Li         return -EINVAL;
918eddedc3SXiaoyao Li     }
928eddedc3SXiaoyao Li 
938eddedc3SXiaoyao Li     if (r < 0) {
948eddedc3SXiaoyao Li         error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx",
958eddedc3SXiaoyao Li                          tdx_ioctl_name[cmd_id], tdx_cmd.hw_error);
968eddedc3SXiaoyao Li     }
978eddedc3SXiaoyao Li     return r;
988eddedc3SXiaoyao Li }
998eddedc3SXiaoyao Li 
1008eddedc3SXiaoyao Li static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data,
1018eddedc3SXiaoyao Li                                Error **errp)
1028eddedc3SXiaoyao Li {
1038eddedc3SXiaoyao Li     return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp);
1048eddedc3SXiaoyao Li }
1058eddedc3SXiaoyao Li 
1068eddedc3SXiaoyao Li static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags,
1078eddedc3SXiaoyao Li                                  void *data, Error **errp)
1088eddedc3SXiaoyao Li {
1098eddedc3SXiaoyao Li     return  tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp);
1108eddedc3SXiaoyao Li }
1118eddedc3SXiaoyao Li 
1128eddedc3SXiaoyao Li static int get_tdx_capabilities(Error **errp)
1138eddedc3SXiaoyao Li {
1148eddedc3SXiaoyao Li     struct kvm_tdx_capabilities *caps;
1158eddedc3SXiaoyao Li     /* 1st generation of TDX reports 6 cpuid configs */
1168eddedc3SXiaoyao Li     int nr_cpuid_configs = 6;
1178eddedc3SXiaoyao Li     size_t size;
1188eddedc3SXiaoyao Li     int r;
1198eddedc3SXiaoyao Li 
1208eddedc3SXiaoyao Li     do {
1218eddedc3SXiaoyao Li         Error *local_err = NULL;
1228eddedc3SXiaoyao Li         size = sizeof(struct kvm_tdx_capabilities) +
1238eddedc3SXiaoyao Li                       nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2);
1248eddedc3SXiaoyao Li         caps = g_malloc0(size);
1258eddedc3SXiaoyao Li         caps->cpuid.nent = nr_cpuid_configs;
1268eddedc3SXiaoyao Li 
1278eddedc3SXiaoyao Li         r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err);
1288eddedc3SXiaoyao Li         if (r == -E2BIG) {
1298eddedc3SXiaoyao Li             g_free(caps);
1308eddedc3SXiaoyao Li             nr_cpuid_configs *= 2;
1318eddedc3SXiaoyao Li             if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) {
1328eddedc3SXiaoyao Li                 error_report("KVM TDX seems broken that number of CPUID entries"
1338eddedc3SXiaoyao Li                              " in kvm_tdx_capabilities exceeds limit: %d",
1348eddedc3SXiaoyao Li                              KVM_MAX_CPUID_ENTRIES);
1358eddedc3SXiaoyao Li                 error_propagate(errp, local_err);
1368eddedc3SXiaoyao Li                 return r;
1378eddedc3SXiaoyao Li             }
1388eddedc3SXiaoyao Li             error_free(local_err);
1398eddedc3SXiaoyao Li         } else if (r < 0) {
1408eddedc3SXiaoyao Li             g_free(caps);
1418eddedc3SXiaoyao Li             error_propagate(errp, local_err);
1428eddedc3SXiaoyao Li             return r;
1438eddedc3SXiaoyao Li         }
1448eddedc3SXiaoyao Li     } while (r == -E2BIG);
1458eddedc3SXiaoyao Li 
1468eddedc3SXiaoyao Li     tdx_caps = caps;
147631a2ac5SXiaoyao Li 
148631a2ac5SXiaoyao Li     return 0;
149631a2ac5SXiaoyao Li }
150631a2ac5SXiaoyao Li 
1510dd5fe5eSChao Peng void tdx_set_tdvf_region(MemoryRegion *tdvf_mr)
1520dd5fe5eSChao Peng {
1530dd5fe5eSChao Peng     assert(!tdx_guest->tdvf_mr);
1540dd5fe5eSChao Peng     tdx_guest->tdvf_mr = tdvf_mr;
1550dd5fe5eSChao Peng }
1560dd5fe5eSChao Peng 
157a7314259SXiaoyao Li static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx)
158a7314259SXiaoyao Li {
159a7314259SXiaoyao Li     TdxFirmwareEntry *entry;
160a7314259SXiaoyao Li 
161a7314259SXiaoyao Li     for_each_tdx_fw_entry(&tdx->tdvf, entry) {
162a7314259SXiaoyao Li         if (entry->type == TDVF_SECTION_TYPE_TD_HOB) {
163a7314259SXiaoyao Li             return entry;
164a7314259SXiaoyao Li         }
165a7314259SXiaoyao Li     }
166a7314259SXiaoyao Li     error_report("TDVF metadata doesn't specify TD_HOB location.");
167a7314259SXiaoyao Li     exit(1);
168a7314259SXiaoyao Li }
169a7314259SXiaoyao Li 
170f18672e4SXiaoyao Li static void tdx_add_ram_entry(uint64_t address, uint64_t length,
171f18672e4SXiaoyao Li                               enum TdxRamType type)
172f18672e4SXiaoyao Li {
173f18672e4SXiaoyao Li     uint32_t nr_entries = tdx_guest->nr_ram_entries;
174f18672e4SXiaoyao Li     tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries,
175f18672e4SXiaoyao Li                                      nr_entries + 1);
176f18672e4SXiaoyao Li 
177f18672e4SXiaoyao Li     tdx_guest->ram_entries[nr_entries].address = address;
178f18672e4SXiaoyao Li     tdx_guest->ram_entries[nr_entries].length = length;
179f18672e4SXiaoyao Li     tdx_guest->ram_entries[nr_entries].type = type;
180f18672e4SXiaoyao Li     tdx_guest->nr_ram_entries++;
181f18672e4SXiaoyao Li }
182f18672e4SXiaoyao Li 
183f18672e4SXiaoyao Li static int tdx_accept_ram_range(uint64_t address, uint64_t length)
184f18672e4SXiaoyao Li {
185f18672e4SXiaoyao Li     uint64_t head_start, tail_start, head_length, tail_length;
186f18672e4SXiaoyao Li     uint64_t tmp_address, tmp_length;
187f18672e4SXiaoyao Li     TdxRamEntry *e;
188f18672e4SXiaoyao Li     int i = 0;
189f18672e4SXiaoyao Li 
190f18672e4SXiaoyao Li     do {
191f18672e4SXiaoyao Li         if (i == tdx_guest->nr_ram_entries) {
192f18672e4SXiaoyao Li             return -1;
193f18672e4SXiaoyao Li         }
194f18672e4SXiaoyao Li 
195f18672e4SXiaoyao Li         e = &tdx_guest->ram_entries[i++];
196f18672e4SXiaoyao Li     } while (address + length <= e->address || address >= e->address + e->length);
197f18672e4SXiaoyao Li 
198f18672e4SXiaoyao Li     /*
199f18672e4SXiaoyao Li      * The to-be-accepted ram range must be fully contained by one
200f18672e4SXiaoyao Li      * RAM entry.
201f18672e4SXiaoyao Li      */
202f18672e4SXiaoyao Li     if (e->address > address ||
203f18672e4SXiaoyao Li         e->address + e->length < address + length) {
204f18672e4SXiaoyao Li         return -1;
205f18672e4SXiaoyao Li     }
206f18672e4SXiaoyao Li 
207f18672e4SXiaoyao Li     if (e->type == TDX_RAM_ADDED) {
208f18672e4SXiaoyao Li         return 0;
209f18672e4SXiaoyao Li     }
210f18672e4SXiaoyao Li 
211f18672e4SXiaoyao Li     tmp_address = e->address;
212f18672e4SXiaoyao Li     tmp_length = e->length;
213f18672e4SXiaoyao Li 
214f18672e4SXiaoyao Li     e->address = address;
215f18672e4SXiaoyao Li     e->length = length;
216f18672e4SXiaoyao Li     e->type = TDX_RAM_ADDED;
217f18672e4SXiaoyao Li 
218f18672e4SXiaoyao Li     head_length = address - tmp_address;
219f18672e4SXiaoyao Li     if (head_length > 0) {
220f18672e4SXiaoyao Li         head_start = tmp_address;
221f18672e4SXiaoyao Li         tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED);
222f18672e4SXiaoyao Li     }
223f18672e4SXiaoyao Li 
224f18672e4SXiaoyao Li     tail_start = address + length;
225f18672e4SXiaoyao Li     if (tail_start < tmp_address + tmp_length) {
226f18672e4SXiaoyao Li         tail_length = tmp_address + tmp_length - tail_start;
227f18672e4SXiaoyao Li         tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED);
228f18672e4SXiaoyao Li     }
229f18672e4SXiaoyao Li 
230f18672e4SXiaoyao Li     return 0;
231f18672e4SXiaoyao Li }
232f18672e4SXiaoyao Li 
233f18672e4SXiaoyao Li static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_)
234f18672e4SXiaoyao Li {
235f18672e4SXiaoyao Li     const TdxRamEntry *lhs = lhs_;
236f18672e4SXiaoyao Li     const TdxRamEntry *rhs = rhs_;
237f18672e4SXiaoyao Li 
238f18672e4SXiaoyao Li     if (lhs->address == rhs->address) {
239f18672e4SXiaoyao Li         return 0;
240f18672e4SXiaoyao Li     }
241f18672e4SXiaoyao Li     if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) {
242f18672e4SXiaoyao Li         return 1;
243f18672e4SXiaoyao Li     }
244f18672e4SXiaoyao Li     return -1;
245f18672e4SXiaoyao Li }
246f18672e4SXiaoyao Li 
247f18672e4SXiaoyao Li static void tdx_init_ram_entries(void)
248f18672e4SXiaoyao Li {
249f18672e4SXiaoyao Li     unsigned i, j, nr_e820_entries;
250f18672e4SXiaoyao Li 
251f18672e4SXiaoyao Li     nr_e820_entries = e820_get_table(NULL);
252f18672e4SXiaoyao Li     tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries);
253f18672e4SXiaoyao Li 
254f18672e4SXiaoyao Li     for (i = 0, j = 0; i < nr_e820_entries; i++) {
255f18672e4SXiaoyao Li         uint64_t addr, len;
256f18672e4SXiaoyao Li 
257f18672e4SXiaoyao Li         if (e820_get_entry(i, E820_RAM, &addr, &len)) {
258f18672e4SXiaoyao Li             tdx_guest->ram_entries[j].address = addr;
259f18672e4SXiaoyao Li             tdx_guest->ram_entries[j].length = len;
260f18672e4SXiaoyao Li             tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED;
261f18672e4SXiaoyao Li             j++;
262f18672e4SXiaoyao Li         }
263f18672e4SXiaoyao Li     }
264f18672e4SXiaoyao Li     tdx_guest->nr_ram_entries = j;
265f18672e4SXiaoyao Li }
266f18672e4SXiaoyao Li 
26741f7fd22SXiaoyao Li static void tdx_post_init_vcpus(void)
26841f7fd22SXiaoyao Li {
26941f7fd22SXiaoyao Li     TdxFirmwareEntry *hob;
27041f7fd22SXiaoyao Li     CPUState *cpu;
27141f7fd22SXiaoyao Li 
27241f7fd22SXiaoyao Li     hob = tdx_get_hob_entry(tdx_guest);
27341f7fd22SXiaoyao Li     CPU_FOREACH(cpu) {
27441f7fd22SXiaoyao Li         tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)hob->address,
27541f7fd22SXiaoyao Li                        &error_fatal);
27641f7fd22SXiaoyao Li     }
27741f7fd22SXiaoyao Li }
27841f7fd22SXiaoyao Li 
2794420ba0eSXiaoyao Li static void tdx_finalize_vm(Notifier *notifier, void *unused)
2804420ba0eSXiaoyao Li {
2814420ba0eSXiaoyao Li     TdxFirmware *tdvf = &tdx_guest->tdvf;
2824420ba0eSXiaoyao Li     TdxFirmwareEntry *entry;
283ebc2d2b4SIsaku Yamahata     RAMBlock *ram_block;
284ebc2d2b4SIsaku Yamahata     Error *local_err = NULL;
285ebc2d2b4SIsaku Yamahata     int r;
2864420ba0eSXiaoyao Li 
287f18672e4SXiaoyao Li     tdx_init_ram_entries();
288f18672e4SXiaoyao Li 
2894420ba0eSXiaoyao Li     for_each_tdx_fw_entry(tdvf, entry) {
2904420ba0eSXiaoyao Li         switch (entry->type) {
2914420ba0eSXiaoyao Li         case TDVF_SECTION_TYPE_BFV:
2924420ba0eSXiaoyao Li         case TDVF_SECTION_TYPE_CFV:
2934420ba0eSXiaoyao Li             entry->mem_ptr = tdvf->mem_ptr + entry->data_offset;
2944420ba0eSXiaoyao Li             break;
2954420ba0eSXiaoyao Li         case TDVF_SECTION_TYPE_TD_HOB:
2964420ba0eSXiaoyao Li         case TDVF_SECTION_TYPE_TEMP_MEM:
2974420ba0eSXiaoyao Li             entry->mem_ptr = qemu_ram_mmap(-1, entry->size,
2984420ba0eSXiaoyao Li                                            qemu_real_host_page_size(), 0, 0);
2994420ba0eSXiaoyao Li             if (entry->mem_ptr == MAP_FAILED) {
3004420ba0eSXiaoyao Li                 error_report("Failed to mmap memory for TDVF section %d",
3014420ba0eSXiaoyao Li                              entry->type);
3024420ba0eSXiaoyao Li                 exit(1);
3034420ba0eSXiaoyao Li             }
304f18672e4SXiaoyao Li             if (tdx_accept_ram_range(entry->address, entry->size)) {
305f18672e4SXiaoyao Li                 error_report("Failed to accept memory for TDVF section %d",
306f18672e4SXiaoyao Li                              entry->type);
307f18672e4SXiaoyao Li                 qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
308f18672e4SXiaoyao Li                 exit(1);
309f18672e4SXiaoyao Li             }
3104420ba0eSXiaoyao Li             break;
3114420ba0eSXiaoyao Li         default:
3124420ba0eSXiaoyao Li             error_report("Unsupported TDVF section %d", entry->type);
3134420ba0eSXiaoyao Li             exit(1);
3144420ba0eSXiaoyao Li         }
3154420ba0eSXiaoyao Li     }
316f18672e4SXiaoyao Li 
317f18672e4SXiaoyao Li     qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries,
318f18672e4SXiaoyao Li           sizeof(TdxRamEntry), &tdx_ram_entry_compare);
319a7314259SXiaoyao Li 
320a7314259SXiaoyao Li     tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest));
321ebc2d2b4SIsaku Yamahata 
32241f7fd22SXiaoyao Li     tdx_post_init_vcpus();
32341f7fd22SXiaoyao Li 
324ebc2d2b4SIsaku Yamahata     for_each_tdx_fw_entry(tdvf, entry) {
325ebc2d2b4SIsaku Yamahata         struct kvm_tdx_init_mem_region region;
326ebc2d2b4SIsaku Yamahata         uint32_t flags;
327ebc2d2b4SIsaku Yamahata 
328ebc2d2b4SIsaku Yamahata         region = (struct kvm_tdx_init_mem_region) {
329ebc2d2b4SIsaku Yamahata             .source_addr = (uint64_t)entry->mem_ptr,
330ebc2d2b4SIsaku Yamahata             .gpa = entry->address,
331ebc2d2b4SIsaku Yamahata             .nr_pages = entry->size >> 12,
332ebc2d2b4SIsaku Yamahata         };
333ebc2d2b4SIsaku Yamahata 
334ebc2d2b4SIsaku Yamahata         flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ?
335ebc2d2b4SIsaku Yamahata                 KVM_TDX_MEASURE_MEMORY_REGION : 0;
336ebc2d2b4SIsaku Yamahata 
337ebc2d2b4SIsaku Yamahata         do {
338ebc2d2b4SIsaku Yamahata             error_free(local_err);
339ebc2d2b4SIsaku Yamahata             local_err = NULL;
340ebc2d2b4SIsaku Yamahata             r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags,
341ebc2d2b4SIsaku Yamahata                                &region, &local_err);
342ebc2d2b4SIsaku Yamahata         } while (r == -EAGAIN || r == -EINTR);
343ebc2d2b4SIsaku Yamahata         if (r < 0) {
344ebc2d2b4SIsaku Yamahata             error_report_err(local_err);
345ebc2d2b4SIsaku Yamahata             exit(1);
346ebc2d2b4SIsaku Yamahata         }
347ebc2d2b4SIsaku Yamahata 
348ebc2d2b4SIsaku Yamahata         if (entry->type == TDVF_SECTION_TYPE_TD_HOB ||
349ebc2d2b4SIsaku Yamahata             entry->type == TDVF_SECTION_TYPE_TEMP_MEM) {
350ebc2d2b4SIsaku Yamahata             qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
351ebc2d2b4SIsaku Yamahata             entry->mem_ptr = NULL;
352ebc2d2b4SIsaku Yamahata         }
353ebc2d2b4SIsaku Yamahata     }
354ebc2d2b4SIsaku Yamahata 
355ebc2d2b4SIsaku Yamahata     /*
356ebc2d2b4SIsaku Yamahata      * TDVF image has been copied into private region above via
357ebc2d2b4SIsaku Yamahata      * KVM_MEMORY_MAPPING. It becomes useless.
358ebc2d2b4SIsaku Yamahata      */
359ebc2d2b4SIsaku Yamahata     ram_block = tdx_guest->tdvf_mr->ram_block;
360ebc2d2b4SIsaku Yamahata     ram_block_discard_range(ram_block, 0, ram_block->max_length);
361ae60ff4eSXiaoyao Li 
362ae60ff4eSXiaoyao Li     tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal);
363ae60ff4eSXiaoyao Li     CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true;
3644420ba0eSXiaoyao Li }
3654420ba0eSXiaoyao Li 
3664420ba0eSXiaoyao Li static Notifier tdx_machine_done_notify = {
3674420ba0eSXiaoyao Li     .notify = tdx_finalize_vm,
3684420ba0eSXiaoyao Li };
3694420ba0eSXiaoyao Li 
370*75ec6189SXiaoyao Li static void tdx_setup_supported_cpuid(void)
371*75ec6189SXiaoyao Li {
372*75ec6189SXiaoyao Li     if (tdx_supported_cpuid) {
373*75ec6189SXiaoyao Li         return;
374*75ec6189SXiaoyao Li     }
375*75ec6189SXiaoyao Li 
376*75ec6189SXiaoyao Li     tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) +
377*75ec6189SXiaoyao Li                     KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2));
378*75ec6189SXiaoyao Li 
379*75ec6189SXiaoyao Li     memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries,
380*75ec6189SXiaoyao Li            tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2));
381*75ec6189SXiaoyao Li     tdx_supported_cpuid->nent = tdx_caps->cpuid.nent;
382*75ec6189SXiaoyao Li }
383*75ec6189SXiaoyao Li 
3848eddedc3SXiaoyao Li static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
3858eddedc3SXiaoyao Li {
386810d4e83SXiaoyao Li     MachineState *ms = MACHINE(qdev_get_machine());
387810d4e83SXiaoyao Li     X86MachineState *x86ms = X86_MACHINE(ms);
3881619d0e4SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(cgs);
3898eddedc3SXiaoyao Li     int r = 0;
3908eddedc3SXiaoyao Li 
3918eddedc3SXiaoyao Li     kvm_mark_guest_state_protected();
3928eddedc3SXiaoyao Li 
393810d4e83SXiaoyao Li     if (x86ms->smm == ON_OFF_AUTO_AUTO) {
394810d4e83SXiaoyao Li         x86ms->smm = ON_OFF_AUTO_OFF;
395810d4e83SXiaoyao Li     } else if (x86ms->smm == ON_OFF_AUTO_ON) {
396810d4e83SXiaoyao Li         error_setg(errp, "TDX VM doesn't support SMM");
397810d4e83SXiaoyao Li         return -EINVAL;
398810d4e83SXiaoyao Li     }
399810d4e83SXiaoyao Li 
400e7ef6089SXiaoyao Li     if (x86ms->pic == ON_OFF_AUTO_AUTO) {
401e7ef6089SXiaoyao Li         x86ms->pic = ON_OFF_AUTO_OFF;
402e7ef6089SXiaoyao Li     } else if (x86ms->pic == ON_OFF_AUTO_ON) {
403e7ef6089SXiaoyao Li         error_setg(errp, "TDX VM doesn't support PIC");
404e7ef6089SXiaoyao Li         return -EINVAL;
405e7ef6089SXiaoyao Li     }
406e7ef6089SXiaoyao Li 
407bb45580dSXiaoyao Li     if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) {
408bb45580dSXiaoyao Li         kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON;
409bb45580dSXiaoyao Li     } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) {
410bb45580dSXiaoyao Li         error_setg(errp, "TDX VM requires kernel_irqchip to be split");
411bb45580dSXiaoyao Li         return -EINVAL;
412bb45580dSXiaoyao Li     }
413bb45580dSXiaoyao Li 
4148eddedc3SXiaoyao Li     if (!tdx_caps) {
4158eddedc3SXiaoyao Li         r = get_tdx_capabilities(errp);
4161619d0e4SXiaoyao Li         if (r) {
4171619d0e4SXiaoyao Li             return r;
4181619d0e4SXiaoyao Li         }
4198eddedc3SXiaoyao Li     }
4208eddedc3SXiaoyao Li 
421*75ec6189SXiaoyao Li     tdx_setup_supported_cpuid();
422*75ec6189SXiaoyao Li 
4231ff5048dSXiaoyao Li     /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */
4241ff5048dSXiaoyao Li     if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) {
4251ff5048dSXiaoyao Li         return -EOPNOTSUPP;
4261ff5048dSXiaoyao Li     }
4271ff5048dSXiaoyao Li 
428da672865SXiaoyao Li     /*
429da672865SXiaoyao Li      * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly
430da672865SXiaoyao Li      * memory for shared memory but not for private memory. Besides, whether a
431da672865SXiaoyao Li      * memslot is private or shared is not determined by QEMU.
432da672865SXiaoyao Li      *
433da672865SXiaoyao Li      * Thus, just mark readonly memory not supported for simplicity.
434da672865SXiaoyao Li      */
435da672865SXiaoyao Li     kvm_readonly_mem_allowed = false;
436da672865SXiaoyao Li 
4374420ba0eSXiaoyao Li     qemu_add_machine_init_done_notifier(&tdx_machine_done_notify);
4384420ba0eSXiaoyao Li 
4391619d0e4SXiaoyao Li     tdx_guest = tdx;
4401619d0e4SXiaoyao Li     return 0;
4418eddedc3SXiaoyao Li }
4428eddedc3SXiaoyao Li 
443b455880eSXiaoyao Li static int tdx_kvm_type(X86ConfidentialGuest *cg)
444b455880eSXiaoyao Li {
445b455880eSXiaoyao Li     /* Do the object check */
446b455880eSXiaoyao Li     TDX_GUEST(cg);
447b455880eSXiaoyao Li 
448b455880eSXiaoyao Li     return KVM_X86_TDX_VM;
449b455880eSXiaoyao Li }
450b455880eSXiaoyao Li 
4517c615242SXiaoyao Li static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
4527c615242SXiaoyao Li {
4539002494fSXiaoyao Li     X86CPU *x86cpu = X86_CPU(cpu);
4549002494fSXiaoyao Li 
4557c615242SXiaoyao Li     object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort);
4569002494fSXiaoyao Li 
4579002494fSXiaoyao Li     x86cpu->enable_cpuid_0x1f = true;
4587c615242SXiaoyao Li }
4597c615242SXiaoyao Li 
460*75ec6189SXiaoyao Li static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg,
461*75ec6189SXiaoyao Li                                           uint32_t feature, uint32_t index,
462*75ec6189SXiaoyao Li                                           int reg, uint32_t value)
463*75ec6189SXiaoyao Li {
464*75ec6189SXiaoyao Li     struct kvm_cpuid_entry2 *e;
465*75ec6189SXiaoyao Li 
466*75ec6189SXiaoyao Li     if (is_feature_word_cpuid(feature, index, reg)) {
467*75ec6189SXiaoyao Li         e = cpuid_find_entry(tdx_supported_cpuid, feature, index);
468*75ec6189SXiaoyao Li         if (e) {
469*75ec6189SXiaoyao Li             value &= cpuid_entry_get_reg(e, reg);
470*75ec6189SXiaoyao Li         }
471*75ec6189SXiaoyao Li     }
472*75ec6189SXiaoyao Li 
473*75ec6189SXiaoyao Li     return value;
474*75ec6189SXiaoyao Li }
475*75ec6189SXiaoyao Li 
47653b6f406SXiaoyao Li static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
47753b6f406SXiaoyao Li {
47853b6f406SXiaoyao Li     if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
47953b6f406SXiaoyao Li         error_setg(errp, "Invalid attributes 0x%lx for TDX VM "
48053b6f406SXiaoyao Li                    "(KVM supported: 0x%llx)", tdx->attributes,
48153b6f406SXiaoyao Li                    tdx_caps->supported_attrs);
48253b6f406SXiaoyao Li         return -1;
48353b6f406SXiaoyao Li     }
48453b6f406SXiaoyao Li 
48553b6f406SXiaoyao Li     if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) {
48653b6f406SXiaoyao Li         error_setg(errp, "Some QEMU unsupported TD attribute bits being "
48753b6f406SXiaoyao Li                     "requested: 0x%lx (QEMU supported: 0x%llx)",
48853b6f406SXiaoyao Li                     tdx->attributes, TDX_SUPPORTED_TD_ATTRS);
48953b6f406SXiaoyao Li         return -1;
49053b6f406SXiaoyao Li     }
49153b6f406SXiaoyao Li 
49253b6f406SXiaoyao Li     return 0;
49353b6f406SXiaoyao Li }
49453b6f406SXiaoyao Li 
49553b6f406SXiaoyao Li static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp)
496bb3be394SXiaoyao Li {
497bb3be394SXiaoyao Li     CPUX86State *env = &x86cpu->env;
498bb3be394SXiaoyao Li 
499bb3be394SXiaoyao Li     tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ?
500bb3be394SXiaoyao Li                              TDX_TD_ATTRIBUTES_PKS : 0;
501bb3be394SXiaoyao Li     tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0;
50253b6f406SXiaoyao Li 
50353b6f406SXiaoyao Li     return tdx_validate_attributes(tdx_guest, errp);
504bb3be394SXiaoyao Li }
505bb3be394SXiaoyao Li 
506f15898b0SXiaoyao Li static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
507f15898b0SXiaoyao Li {
508f15898b0SXiaoyao Li     CPUX86State *env = &x86cpu->env;
509f15898b0SXiaoyao Li     uint64_t xfam;
510f15898b0SXiaoyao Li 
511f15898b0SXiaoyao Li     xfam = env->features[FEAT_XSAVE_XCR0_LO] |
512f15898b0SXiaoyao Li            env->features[FEAT_XSAVE_XCR0_HI] |
513f15898b0SXiaoyao Li            env->features[FEAT_XSAVE_XSS_LO] |
514f15898b0SXiaoyao Li            env->features[FEAT_XSAVE_XSS_HI];
515f15898b0SXiaoyao Li 
516f15898b0SXiaoyao Li     if (xfam & ~tdx_caps->supported_xfam) {
517f15898b0SXiaoyao Li         error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))",
518f15898b0SXiaoyao Li                    xfam, tdx_caps->supported_xfam);
519f15898b0SXiaoyao Li         return -1;
520f15898b0SXiaoyao Li     }
521f15898b0SXiaoyao Li 
522f15898b0SXiaoyao Li     tdx_guest->xfam = xfam;
523f15898b0SXiaoyao Li     return 0;
524f15898b0SXiaoyao Li }
525f15898b0SXiaoyao Li 
526f15898b0SXiaoyao Li static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
527f15898b0SXiaoyao Li {
528f15898b0SXiaoyao Li     int i, dest_cnt = 0;
529f15898b0SXiaoyao Li     struct kvm_cpuid_entry2 *src, *dest, *conf;
530f15898b0SXiaoyao Li 
531f15898b0SXiaoyao Li     for (i = 0; i < cpuids->nent; i++) {
532f15898b0SXiaoyao Li         src = cpuids->entries + i;
533f15898b0SXiaoyao Li         conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
534f15898b0SXiaoyao Li         if (!conf) {
535f15898b0SXiaoyao Li             continue;
536f15898b0SXiaoyao Li         }
537f15898b0SXiaoyao Li         dest = cpuids->entries + dest_cnt;
538f15898b0SXiaoyao Li 
539f15898b0SXiaoyao Li         dest->function = src->function;
540f15898b0SXiaoyao Li         dest->index = src->index;
541f15898b0SXiaoyao Li         dest->flags = src->flags;
542f15898b0SXiaoyao Li         dest->eax = src->eax & conf->eax;
543f15898b0SXiaoyao Li         dest->ebx = src->ebx & conf->ebx;
544f15898b0SXiaoyao Li         dest->ecx = src->ecx & conf->ecx;
545f15898b0SXiaoyao Li         dest->edx = src->edx & conf->edx;
546f15898b0SXiaoyao Li 
547f15898b0SXiaoyao Li         dest_cnt++;
548f15898b0SXiaoyao Li     }
549f15898b0SXiaoyao Li     cpuids->nent = dest_cnt++;
550f15898b0SXiaoyao Li }
551f15898b0SXiaoyao Li 
552f15898b0SXiaoyao Li int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
553f15898b0SXiaoyao Li {
554f15898b0SXiaoyao Li     X86CPU *x86cpu = X86_CPU(cpu);
555f15898b0SXiaoyao Li     CPUX86State *env = &x86cpu->env;
556f15898b0SXiaoyao Li     g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
557f15898b0SXiaoyao Li     Error *local_err = NULL;
558d05a0858SIsaku Yamahata     size_t data_len;
559f15898b0SXiaoyao Li     int retry = 10000;
560f15898b0SXiaoyao Li     int r = 0;
561f15898b0SXiaoyao Li 
562f15898b0SXiaoyao Li     QEMU_LOCK_GUARD(&tdx_guest->lock);
563f15898b0SXiaoyao Li     if (tdx_guest->initialized) {
564f15898b0SXiaoyao Li         return r;
565f15898b0SXiaoyao Li     }
566f15898b0SXiaoyao Li 
567f15898b0SXiaoyao Li     init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
568f15898b0SXiaoyao Li                         sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
569f15898b0SXiaoyao Li 
570d529a2acSXiaoyao Li     if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) {
571d529a2acSXiaoyao Li         error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS");
572d529a2acSXiaoyao Li         return -EOPNOTSUPP;
573d529a2acSXiaoyao Li     }
574d529a2acSXiaoyao Li 
575d529a2acSXiaoyao Li     r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
576d529a2acSXiaoyao Li                           0, TDX_APIC_BUS_CYCLES_NS);
577d529a2acSXiaoyao Li     if (r < 0) {
578d529a2acSXiaoyao Li         error_setg_errno(errp, -r,
579d529a2acSXiaoyao Li                          "Unable to set core crystal clock frequency to 25MHz");
580d529a2acSXiaoyao Li         return r;
581d529a2acSXiaoyao Li     }
582d529a2acSXiaoyao Li 
5830e73b843SXiaoyao Li     if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ ||
5840e73b843SXiaoyao Li                          env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) {
5850e73b843SXiaoyao Li         error_setg(errp, "Invalid TSC %ld KHz, must specify cpu_frequency "
5860e73b843SXiaoyao Li                          "between [%d, %d] kHz", env->tsc_khz,
5870e73b843SXiaoyao Li                          TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ);
5880e73b843SXiaoyao Li        return -EINVAL;
5890e73b843SXiaoyao Li     }
5900e73b843SXiaoyao Li 
5910e73b843SXiaoyao Li     if (env->tsc_khz % (25 * 1000)) {
5920e73b843SXiaoyao Li         error_setg(errp, "Invalid TSC %ld KHz, it must be multiple of 25MHz",
5930e73b843SXiaoyao Li                    env->tsc_khz);
5940e73b843SXiaoyao Li         return -EINVAL;
5950e73b843SXiaoyao Li     }
5960e73b843SXiaoyao Li 
5970e73b843SXiaoyao Li     /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */
5980e73b843SXiaoyao Li     r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz);
5990e73b843SXiaoyao Li     if (r < 0) {
6000e73b843SXiaoyao Li         error_setg_errno(errp, -r, "Unable to set TSC frequency to %ld kHz",
6010e73b843SXiaoyao Li                          env->tsc_khz);
6020e73b843SXiaoyao Li         return r;
6030e73b843SXiaoyao Li     }
6040e73b843SXiaoyao Li 
605d05a0858SIsaku Yamahata     if (tdx_guest->mrconfigid) {
606d05a0858SIsaku Yamahata         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid,
607d05a0858SIsaku Yamahata                               strlen(tdx_guest->mrconfigid), &data_len, errp);
608d05a0858SIsaku Yamahata         if (!data) {
609d05a0858SIsaku Yamahata             return -1;
610d05a0858SIsaku Yamahata         }
611d05a0858SIsaku Yamahata         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
612d05a0858SIsaku Yamahata             error_setg(errp, "TDX: failed to decode mrconfigid");
613d05a0858SIsaku Yamahata             return -1;
614d05a0858SIsaku Yamahata         }
615d05a0858SIsaku Yamahata         memcpy(init_vm->mrconfigid, data, data_len);
616d05a0858SIsaku Yamahata     }
617d05a0858SIsaku Yamahata 
618d05a0858SIsaku Yamahata     if (tdx_guest->mrowner) {
619d05a0858SIsaku Yamahata         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner,
620d05a0858SIsaku Yamahata                               strlen(tdx_guest->mrowner), &data_len, errp);
621d05a0858SIsaku Yamahata         if (!data) {
622d05a0858SIsaku Yamahata             return -1;
623d05a0858SIsaku Yamahata         }
624d05a0858SIsaku Yamahata         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
625d05a0858SIsaku Yamahata             error_setg(errp, "TDX: failed to decode mrowner");
626d05a0858SIsaku Yamahata             return -1;
627d05a0858SIsaku Yamahata         }
628d05a0858SIsaku Yamahata         memcpy(init_vm->mrowner, data, data_len);
629d05a0858SIsaku Yamahata     }
630d05a0858SIsaku Yamahata 
631d05a0858SIsaku Yamahata     if (tdx_guest->mrownerconfig) {
632d05a0858SIsaku Yamahata         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig,
633d05a0858SIsaku Yamahata                             strlen(tdx_guest->mrownerconfig), &data_len, errp);
634d05a0858SIsaku Yamahata         if (!data) {
635d05a0858SIsaku Yamahata             return -1;
636d05a0858SIsaku Yamahata         }
637d05a0858SIsaku Yamahata         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
638d05a0858SIsaku Yamahata             error_setg(errp, "TDX: failed to decode mrownerconfig");
639d05a0858SIsaku Yamahata             return -1;
640d05a0858SIsaku Yamahata         }
641d05a0858SIsaku Yamahata         memcpy(init_vm->mrownerconfig, data, data_len);
642d05a0858SIsaku Yamahata     }
643d05a0858SIsaku Yamahata 
64453b6f406SXiaoyao Li     r = setup_td_guest_attributes(x86cpu, errp);
64553b6f406SXiaoyao Li     if (r) {
64653b6f406SXiaoyao Li         return r;
64753b6f406SXiaoyao Li     }
648bb3be394SXiaoyao Li 
649f15898b0SXiaoyao Li     r = setup_td_xfam(x86cpu, errp);
650f15898b0SXiaoyao Li     if (r) {
651f15898b0SXiaoyao Li         return r;
652f15898b0SXiaoyao Li     }
653f15898b0SXiaoyao Li 
654f15898b0SXiaoyao Li     init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
655f15898b0SXiaoyao Li     tdx_filter_cpuid(&init_vm->cpuid);
656f15898b0SXiaoyao Li 
657f15898b0SXiaoyao Li     init_vm->attributes = tdx_guest->attributes;
658f15898b0SXiaoyao Li     init_vm->xfam = tdx_guest->xfam;
659f15898b0SXiaoyao Li 
660f15898b0SXiaoyao Li     /*
661f15898b0SXiaoyao Li      * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE)
662f15898b0SXiaoyao Li      * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or
663f15898b0SXiaoyao Li      * RDSEED) is busy.
664f15898b0SXiaoyao Li      *
665f15898b0SXiaoyao Li      * Retry for the case.
666f15898b0SXiaoyao Li      */
667f15898b0SXiaoyao Li     do {
668f15898b0SXiaoyao Li         error_free(local_err);
669f15898b0SXiaoyao Li         local_err = NULL;
670f15898b0SXiaoyao Li         r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err);
671f15898b0SXiaoyao Li     } while (r == -EAGAIN && --retry);
672f15898b0SXiaoyao Li 
673f15898b0SXiaoyao Li     if (r < 0) {
674f15898b0SXiaoyao Li         if (!retry) {
675f15898b0SXiaoyao Li             error_append_hint(&local_err, "Hardware RNG (Random Number "
676f15898b0SXiaoyao Li             "Generator) is busy occupied by someone (via RDRAND/RDSEED) "
677f15898b0SXiaoyao Li             "maliciously, which leads to KVM_TDX_INIT_VM keeping failure "
678f15898b0SXiaoyao Li             "due to lack of entropy.\n");
679f15898b0SXiaoyao Li         }
680f15898b0SXiaoyao Li         error_propagate(errp, local_err);
681f15898b0SXiaoyao Li         return r;
682f15898b0SXiaoyao Li     }
683f15898b0SXiaoyao Li 
684f15898b0SXiaoyao Li     tdx_guest->initialized = true;
685f15898b0SXiaoyao Li 
686f15898b0SXiaoyao Li     return 0;
687f15898b0SXiaoyao Li }
688f15898b0SXiaoyao Li 
689cb5d65a8SXiaoyao Li int tdx_parse_tdvf(void *flash_ptr, int size)
690cb5d65a8SXiaoyao Li {
691cb5d65a8SXiaoyao Li     return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
692cb5d65a8SXiaoyao Li }
693cb5d65a8SXiaoyao Li 
6946e250463SXiaoyao Li static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
6956e250463SXiaoyao Li                                         char *message, uint64_t gpa)
6966e250463SXiaoyao Li {
6976e250463SXiaoyao Li     GuestPanicInformation *panic_info;
6986e250463SXiaoyao Li 
6996e250463SXiaoyao Li     panic_info = g_new0(GuestPanicInformation, 1);
7006e250463SXiaoyao Li     panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX;
7016e250463SXiaoyao Li     panic_info->u.tdx.error_code = (uint32_t) error_code;
7026e250463SXiaoyao Li     panic_info->u.tdx.message = message;
7036e250463SXiaoyao Li     panic_info->u.tdx.gpa = gpa;
7046e250463SXiaoyao Li 
7056e250463SXiaoyao Li     qemu_system_guest_panicked(panic_info);
7066e250463SXiaoyao Li }
7076e250463SXiaoyao Li 
70898dbfd68SXiaoyao Li /*
70998dbfd68SXiaoyao Li  * Only 8 registers can contain valid ASCII byte stream to form the fatal
71098dbfd68SXiaoyao Li  * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX
71198dbfd68SXiaoyao Li  */
71298dbfd68SXiaoyao Li #define TDX_FATAL_MESSAGE_MAX        64
71398dbfd68SXiaoyao Li 
7146e250463SXiaoyao Li #define TDX_REPORT_FATAL_ERROR_GPA_VALID    BIT_ULL(63)
7156e250463SXiaoyao Li 
71698dbfd68SXiaoyao Li int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
71798dbfd68SXiaoyao Li {
71898dbfd68SXiaoyao Li     uint64_t error_code = run->system_event.data[R_R12];
71998dbfd68SXiaoyao Li     uint64_t reg_mask = run->system_event.data[R_ECX];
72098dbfd68SXiaoyao Li     char *message = NULL;
72198dbfd68SXiaoyao Li     uint64_t *tmp;
7226e250463SXiaoyao Li     uint64_t gpa = -1ull;
72398dbfd68SXiaoyao Li 
72498dbfd68SXiaoyao Li     if (error_code & 0xffff) {
72598dbfd68SXiaoyao Li         error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%lx",
72698dbfd68SXiaoyao Li                      error_code);
72798dbfd68SXiaoyao Li         return -1;
72898dbfd68SXiaoyao Li     }
72998dbfd68SXiaoyao Li 
73098dbfd68SXiaoyao Li     if (reg_mask) {
73198dbfd68SXiaoyao Li         message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1);
73298dbfd68SXiaoyao Li         tmp = (uint64_t *)message;
73398dbfd68SXiaoyao Li 
73498dbfd68SXiaoyao Li #define COPY_REG(REG)                               \
73598dbfd68SXiaoyao Li     do {                                            \
73698dbfd68SXiaoyao Li         if (reg_mask & BIT_ULL(REG)) {              \
73798dbfd68SXiaoyao Li             *(tmp++) = run->system_event.data[REG]; \
73898dbfd68SXiaoyao Li         }                                           \
73998dbfd68SXiaoyao Li     } while (0)
74098dbfd68SXiaoyao Li 
74198dbfd68SXiaoyao Li         COPY_REG(R_R14);
74298dbfd68SXiaoyao Li         COPY_REG(R_R15);
74398dbfd68SXiaoyao Li         COPY_REG(R_EBX);
74498dbfd68SXiaoyao Li         COPY_REG(R_EDI);
74598dbfd68SXiaoyao Li         COPY_REG(R_ESI);
74698dbfd68SXiaoyao Li         COPY_REG(R_R8);
74798dbfd68SXiaoyao Li         COPY_REG(R_R9);
74898dbfd68SXiaoyao Li         COPY_REG(R_EDX);
74998dbfd68SXiaoyao Li         *((char *)tmp) = '\0';
75098dbfd68SXiaoyao Li     }
75198dbfd68SXiaoyao Li #undef COPY_REG
75298dbfd68SXiaoyao Li 
7536e250463SXiaoyao Li     if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) {
7546e250463SXiaoyao Li         gpa = run->system_event.data[R_R13];
7556e250463SXiaoyao Li     }
7566e250463SXiaoyao Li 
7576e250463SXiaoyao Li     tdx_panicked_on_fatal_error(cpu, error_code, message, gpa);
7586e250463SXiaoyao Li 
75998dbfd68SXiaoyao Li     return -1;
76098dbfd68SXiaoyao Li }
76198dbfd68SXiaoyao Li 
7626016e297SXiaoyao Li static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp)
7636016e297SXiaoyao Li {
7646016e297SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(obj);
7656016e297SXiaoyao Li 
7666016e297SXiaoyao Li     return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE);
7676016e297SXiaoyao Li }
7686016e297SXiaoyao Li 
7696016e297SXiaoyao Li static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp)
7706016e297SXiaoyao Li {
7716016e297SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(obj);
7726016e297SXiaoyao Li 
7736016e297SXiaoyao Li     if (value) {
7746016e297SXiaoyao Li         tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
7756016e297SXiaoyao Li     } else {
7766016e297SXiaoyao Li         tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
7776016e297SXiaoyao Li     }
7786016e297SXiaoyao Li }
7796016e297SXiaoyao Li 
780d05a0858SIsaku Yamahata static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp)
781d05a0858SIsaku Yamahata {
782d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
783d05a0858SIsaku Yamahata 
784d05a0858SIsaku Yamahata     return g_strdup(tdx->mrconfigid);
785d05a0858SIsaku Yamahata }
786d05a0858SIsaku Yamahata 
787d05a0858SIsaku Yamahata static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp)
788d05a0858SIsaku Yamahata {
789d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
790d05a0858SIsaku Yamahata 
791d05a0858SIsaku Yamahata     g_free(tdx->mrconfigid);
792d05a0858SIsaku Yamahata     tdx->mrconfigid = g_strdup(value);
793d05a0858SIsaku Yamahata }
794d05a0858SIsaku Yamahata 
795d05a0858SIsaku Yamahata static char *tdx_guest_get_mrowner(Object *obj, Error **errp)
796d05a0858SIsaku Yamahata {
797d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
798d05a0858SIsaku Yamahata 
799d05a0858SIsaku Yamahata     return g_strdup(tdx->mrowner);
800d05a0858SIsaku Yamahata }
801d05a0858SIsaku Yamahata 
802d05a0858SIsaku Yamahata static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp)
803d05a0858SIsaku Yamahata {
804d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
805d05a0858SIsaku Yamahata 
806d05a0858SIsaku Yamahata     g_free(tdx->mrowner);
807d05a0858SIsaku Yamahata     tdx->mrowner = g_strdup(value);
808d05a0858SIsaku Yamahata }
809d05a0858SIsaku Yamahata 
810d05a0858SIsaku Yamahata static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp)
811d05a0858SIsaku Yamahata {
812d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
813d05a0858SIsaku Yamahata 
814d05a0858SIsaku Yamahata     return g_strdup(tdx->mrownerconfig);
815d05a0858SIsaku Yamahata }
816d05a0858SIsaku Yamahata 
817d05a0858SIsaku Yamahata static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp)
818d05a0858SIsaku Yamahata {
819d05a0858SIsaku Yamahata     TdxGuest *tdx = TDX_GUEST(obj);
820d05a0858SIsaku Yamahata 
821d05a0858SIsaku Yamahata     g_free(tdx->mrownerconfig);
822d05a0858SIsaku Yamahata     tdx->mrownerconfig = g_strdup(value);
823d05a0858SIsaku Yamahata }
824d05a0858SIsaku Yamahata 
825756e12e7SXiaoyao Li /* tdx guest */
826756e12e7SXiaoyao Li OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
827756e12e7SXiaoyao Li                                    tdx_guest,
828756e12e7SXiaoyao Li                                    TDX_GUEST,
829756e12e7SXiaoyao Li                                    X86_CONFIDENTIAL_GUEST,
830756e12e7SXiaoyao Li                                    { TYPE_USER_CREATABLE },
831756e12e7SXiaoyao Li                                    { NULL })
832756e12e7SXiaoyao Li 
833756e12e7SXiaoyao Li static void tdx_guest_init(Object *obj)
834756e12e7SXiaoyao Li {
835756e12e7SXiaoyao Li     ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
836756e12e7SXiaoyao Li     TdxGuest *tdx = TDX_GUEST(obj);
837756e12e7SXiaoyao Li 
838f15898b0SXiaoyao Li     qemu_mutex_init(&tdx->lock);
839f15898b0SXiaoyao Li 
840756e12e7SXiaoyao Li     cgs->require_guest_memfd = true;
841714af522SIsaku Yamahata     tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
842756e12e7SXiaoyao Li 
843756e12e7SXiaoyao Li     object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
844756e12e7SXiaoyao Li                                    OBJ_PROP_FLAG_READWRITE);
8456016e297SXiaoyao Li     object_property_add_bool(obj, "sept-ve-disable",
8466016e297SXiaoyao Li                              tdx_guest_get_sept_ve_disable,
8476016e297SXiaoyao Li                              tdx_guest_set_sept_ve_disable);
848d05a0858SIsaku Yamahata     object_property_add_str(obj, "mrconfigid",
849d05a0858SIsaku Yamahata                             tdx_guest_get_mrconfigid,
850d05a0858SIsaku Yamahata                             tdx_guest_set_mrconfigid);
851d05a0858SIsaku Yamahata     object_property_add_str(obj, "mrowner",
852d05a0858SIsaku Yamahata                             tdx_guest_get_mrowner, tdx_guest_set_mrowner);
853d05a0858SIsaku Yamahata     object_property_add_str(obj, "mrownerconfig",
854d05a0858SIsaku Yamahata                             tdx_guest_get_mrownerconfig,
855d05a0858SIsaku Yamahata                             tdx_guest_set_mrownerconfig);
856756e12e7SXiaoyao Li }
857756e12e7SXiaoyao Li 
858756e12e7SXiaoyao Li static void tdx_guest_finalize(Object *obj)
859756e12e7SXiaoyao Li {
860756e12e7SXiaoyao Li }
861756e12e7SXiaoyao Li 
862756e12e7SXiaoyao Li static void tdx_guest_class_init(ObjectClass *oc, const void *data)
863756e12e7SXiaoyao Li {
864631a2ac5SXiaoyao Li     ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
865b455880eSXiaoyao Li     X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
866b455880eSXiaoyao Li 
867631a2ac5SXiaoyao Li     klass->kvm_init = tdx_kvm_init;
868b455880eSXiaoyao Li     x86_klass->kvm_type = tdx_kvm_type;
8697c615242SXiaoyao Li     x86_klass->cpu_instance_init = tdx_cpu_instance_init;
870*75ec6189SXiaoyao Li     x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features;
871756e12e7SXiaoyao Li }
872