xref: /qemu/target/i386/kvm/tdx.c (revision f8a113701dd2d28f3bedb216e59125ddcb77fd05)
1 /*
2  * QEMU TDX support
3  *
4  * Copyright (c) 2025 Intel Corporation
5  *
6  * Author:
7  *      Xiaoyao Li <xiaoyao.li@intel.com>
8  *
9  * SPDX-License-Identifier: GPL-2.0-or-later
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/error-report.h"
14 #include "qemu/base64.h"
15 #include "qemu/mmap-alloc.h"
16 #include "qapi/error.h"
17 #include "qom/object_interfaces.h"
18 #include "crypto/hash.h"
19 #include "system/kvm_int.h"
20 #include "system/runstate.h"
21 #include "system/system.h"
22 #include "system/ramblock.h"
23 
24 #include <linux/kvm_para.h>
25 
26 #include "cpu.h"
27 #include "cpu-internal.h"
28 #include "host-cpu.h"
29 #include "hw/i386/e820_memory_layout.h"
30 #include "hw/i386/tdvf.h"
31 #include "hw/i386/x86.h"
32 #include "hw/i386/tdvf-hob.h"
33 #include "kvm_i386.h"
34 #include "tdx.h"
35 
36 #include "standard-headers/asm-x86/kvm_para.h"
37 
38 #define TDX_MIN_TSC_FREQUENCY_KHZ   (100 * 1000)
39 #define TDX_MAX_TSC_FREQUENCY_KHZ   (10 * 1000 * 1000)
40 
41 #define TDX_TD_ATTRIBUTES_DEBUG             BIT_ULL(0)
42 #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE   BIT_ULL(28)
43 #define TDX_TD_ATTRIBUTES_PKS               BIT_ULL(30)
44 #define TDX_TD_ATTRIBUTES_PERFMON           BIT_ULL(63)
45 
46 #define TDX_SUPPORTED_TD_ATTRS  (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\
47                                  TDX_TD_ATTRIBUTES_PKS | \
48                                  TDX_TD_ATTRIBUTES_PERFMON)
49 
50 #define TDX_SUPPORTED_KVM_FEATURES  ((1U << KVM_FEATURE_NOP_IO_DELAY) | \
51                                      (1U << KVM_FEATURE_PV_UNHALT) | \
52                                      (1U << KVM_FEATURE_PV_TLB_FLUSH) | \
53                                      (1U << KVM_FEATURE_PV_SEND_IPI) | \
54                                      (1U << KVM_FEATURE_POLL_CONTROL) | \
55                                      (1U << KVM_FEATURE_PV_SCHED_YIELD) | \
56                                      (1U << KVM_FEATURE_MSI_EXT_DEST_ID))
57 
58 static TdxGuest *tdx_guest;
59 
60 static struct kvm_tdx_capabilities *tdx_caps;
61 static struct kvm_cpuid2 *tdx_supported_cpuid;
62 
63 /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */
is_tdx_vm(void)64 bool is_tdx_vm(void)
65 {
66     return !!tdx_guest;
67 }
68 
69 enum tdx_ioctl_level {
70     TDX_VM_IOCTL,
71     TDX_VCPU_IOCTL,
72 };
73 
tdx_ioctl_internal(enum tdx_ioctl_level level,void * state,int cmd_id,__u32 flags,void * data,Error ** errp)74 static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state,
75                               int cmd_id, __u32 flags, void *data,
76                               Error **errp)
77 {
78     struct kvm_tdx_cmd tdx_cmd = {};
79     int r;
80 
81     const char *tdx_ioctl_name[] = {
82         [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES",
83         [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM",
84         [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU",
85         [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION",
86         [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM",
87         [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID",
88     };
89 
90     tdx_cmd.id = cmd_id;
91     tdx_cmd.flags = flags;
92     tdx_cmd.data = (__u64)(unsigned long)data;
93 
94     switch (level) {
95     case TDX_VM_IOCTL:
96         r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
97         break;
98     case TDX_VCPU_IOCTL:
99         r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
100         break;
101     default:
102         error_setg(errp, "Invalid tdx_ioctl_level %d", level);
103         return -EINVAL;
104     }
105 
106     if (r < 0) {
107         error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx",
108                          tdx_ioctl_name[cmd_id], tdx_cmd.hw_error);
109     }
110     return r;
111 }
112 
tdx_vm_ioctl(int cmd_id,__u32 flags,void * data,Error ** errp)113 static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data,
114                                Error **errp)
115 {
116     return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp);
117 }
118 
tdx_vcpu_ioctl(CPUState * cpu,int cmd_id,__u32 flags,void * data,Error ** errp)119 static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags,
120                                  void *data, Error **errp)
121 {
122     return  tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp);
123 }
124 
get_tdx_capabilities(Error ** errp)125 static int get_tdx_capabilities(Error **errp)
126 {
127     struct kvm_tdx_capabilities *caps;
128     /* 1st generation of TDX reports 6 cpuid configs */
129     int nr_cpuid_configs = 6;
130     size_t size;
131     int r;
132 
133     do {
134         Error *local_err = NULL;
135         size = sizeof(struct kvm_tdx_capabilities) +
136                       nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2);
137         caps = g_malloc0(size);
138         caps->cpuid.nent = nr_cpuid_configs;
139 
140         r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err);
141         if (r == -E2BIG) {
142             g_free(caps);
143             nr_cpuid_configs *= 2;
144             if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) {
145                 error_report("KVM TDX seems broken that number of CPUID entries"
146                              " in kvm_tdx_capabilities exceeds limit: %d",
147                              KVM_MAX_CPUID_ENTRIES);
148                 error_propagate(errp, local_err);
149                 return r;
150             }
151             error_free(local_err);
152         } else if (r < 0) {
153             g_free(caps);
154             error_propagate(errp, local_err);
155             return r;
156         }
157     } while (r == -E2BIG);
158 
159     tdx_caps = caps;
160 
161     return 0;
162 }
163 
tdx_set_tdvf_region(MemoryRegion * tdvf_mr)164 void tdx_set_tdvf_region(MemoryRegion *tdvf_mr)
165 {
166     assert(!tdx_guest->tdvf_mr);
167     tdx_guest->tdvf_mr = tdvf_mr;
168 }
169 
tdx_get_hob_entry(TdxGuest * tdx)170 static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx)
171 {
172     TdxFirmwareEntry *entry;
173 
174     for_each_tdx_fw_entry(&tdx->tdvf, entry) {
175         if (entry->type == TDVF_SECTION_TYPE_TD_HOB) {
176             return entry;
177         }
178     }
179     error_report("TDVF metadata doesn't specify TD_HOB location.");
180     exit(1);
181 }
182 
tdx_add_ram_entry(uint64_t address,uint64_t length,enum TdxRamType type)183 static void tdx_add_ram_entry(uint64_t address, uint64_t length,
184                               enum TdxRamType type)
185 {
186     uint32_t nr_entries = tdx_guest->nr_ram_entries;
187     tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries,
188                                      nr_entries + 1);
189 
190     tdx_guest->ram_entries[nr_entries].address = address;
191     tdx_guest->ram_entries[nr_entries].length = length;
192     tdx_guest->ram_entries[nr_entries].type = type;
193     tdx_guest->nr_ram_entries++;
194 }
195 
tdx_accept_ram_range(uint64_t address,uint64_t length)196 static int tdx_accept_ram_range(uint64_t address, uint64_t length)
197 {
198     uint64_t head_start, tail_start, head_length, tail_length;
199     uint64_t tmp_address, tmp_length;
200     TdxRamEntry *e;
201     int i = 0;
202 
203     do {
204         if (i == tdx_guest->nr_ram_entries) {
205             return -1;
206         }
207 
208         e = &tdx_guest->ram_entries[i++];
209     } while (address + length <= e->address || address >= e->address + e->length);
210 
211     /*
212      * The to-be-accepted ram range must be fully contained by one
213      * RAM entry.
214      */
215     if (e->address > address ||
216         e->address + e->length < address + length) {
217         return -1;
218     }
219 
220     if (e->type == TDX_RAM_ADDED) {
221         return 0;
222     }
223 
224     tmp_address = e->address;
225     tmp_length = e->length;
226 
227     e->address = address;
228     e->length = length;
229     e->type = TDX_RAM_ADDED;
230 
231     head_length = address - tmp_address;
232     if (head_length > 0) {
233         head_start = tmp_address;
234         tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED);
235     }
236 
237     tail_start = address + length;
238     if (tail_start < tmp_address + tmp_length) {
239         tail_length = tmp_address + tmp_length - tail_start;
240         tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED);
241     }
242 
243     return 0;
244 }
245 
tdx_ram_entry_compare(const void * lhs_,const void * rhs_)246 static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_)
247 {
248     const TdxRamEntry *lhs = lhs_;
249     const TdxRamEntry *rhs = rhs_;
250 
251     if (lhs->address == rhs->address) {
252         return 0;
253     }
254     if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) {
255         return 1;
256     }
257     return -1;
258 }
259 
tdx_init_ram_entries(void)260 static void tdx_init_ram_entries(void)
261 {
262     unsigned i, j, nr_e820_entries;
263 
264     nr_e820_entries = e820_get_table(NULL);
265     tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries);
266 
267     for (i = 0, j = 0; i < nr_e820_entries; i++) {
268         uint64_t addr, len;
269 
270         if (e820_get_entry(i, E820_RAM, &addr, &len)) {
271             tdx_guest->ram_entries[j].address = addr;
272             tdx_guest->ram_entries[j].length = len;
273             tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED;
274             j++;
275         }
276     }
277     tdx_guest->nr_ram_entries = j;
278 }
279 
tdx_post_init_vcpus(void)280 static void tdx_post_init_vcpus(void)
281 {
282     TdxFirmwareEntry *hob;
283     CPUState *cpu;
284 
285     hob = tdx_get_hob_entry(tdx_guest);
286     CPU_FOREACH(cpu) {
287         tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address,
288                        &error_fatal);
289     }
290 }
291 
tdx_finalize_vm(Notifier * notifier,void * unused)292 static void tdx_finalize_vm(Notifier *notifier, void *unused)
293 {
294     TdxFirmware *tdvf = &tdx_guest->tdvf;
295     TdxFirmwareEntry *entry;
296     RAMBlock *ram_block;
297     Error *local_err = NULL;
298     int r;
299 
300     tdx_init_ram_entries();
301 
302     for_each_tdx_fw_entry(tdvf, entry) {
303         switch (entry->type) {
304         case TDVF_SECTION_TYPE_BFV:
305         case TDVF_SECTION_TYPE_CFV:
306             entry->mem_ptr = tdvf->mem_ptr + entry->data_offset;
307             break;
308         case TDVF_SECTION_TYPE_TD_HOB:
309         case TDVF_SECTION_TYPE_TEMP_MEM:
310             entry->mem_ptr = qemu_ram_mmap(-1, entry->size,
311                                            qemu_real_host_page_size(), 0, 0);
312             if (entry->mem_ptr == MAP_FAILED) {
313                 error_report("Failed to mmap memory for TDVF section %d",
314                              entry->type);
315                 exit(1);
316             }
317             if (tdx_accept_ram_range(entry->address, entry->size)) {
318                 error_report("Failed to accept memory for TDVF section %d",
319                              entry->type);
320                 qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
321                 exit(1);
322             }
323             break;
324         default:
325             error_report("Unsupported TDVF section %d", entry->type);
326             exit(1);
327         }
328     }
329 
330     qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries,
331           sizeof(TdxRamEntry), &tdx_ram_entry_compare);
332 
333     tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest));
334 
335     tdx_post_init_vcpus();
336 
337     for_each_tdx_fw_entry(tdvf, entry) {
338         struct kvm_tdx_init_mem_region region;
339         uint32_t flags;
340 
341         region = (struct kvm_tdx_init_mem_region) {
342             .source_addr = (uintptr_t)entry->mem_ptr,
343             .gpa = entry->address,
344             .nr_pages = entry->size >> 12,
345         };
346 
347         flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ?
348                 KVM_TDX_MEASURE_MEMORY_REGION : 0;
349 
350         do {
351             error_free(local_err);
352             local_err = NULL;
353             r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags,
354                                &region, &local_err);
355         } while (r == -EAGAIN || r == -EINTR);
356         if (r < 0) {
357             error_report_err(local_err);
358             exit(1);
359         }
360 
361         if (entry->type == TDVF_SECTION_TYPE_TD_HOB ||
362             entry->type == TDVF_SECTION_TYPE_TEMP_MEM) {
363             qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
364             entry->mem_ptr = NULL;
365         }
366     }
367 
368     /*
369      * TDVF image has been copied into private region above via
370      * KVM_MEMORY_MAPPING. It becomes useless.
371      */
372     ram_block = tdx_guest->tdvf_mr->ram_block;
373     ram_block_discard_range(ram_block, 0, ram_block->max_length);
374 
375     tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal);
376     CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true;
377 }
378 
379 static Notifier tdx_machine_done_notify = {
380     .notify = tdx_finalize_vm,
381 };
382 
383 /*
384  * Some CPUID bits change from fixed1 to configurable bits when TDX module
385  * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY.
386  *
387  * To make QEMU work with all the versions of TDX module, keep the fixed1 bits
388  * here if they are ever fixed1 bits in any of the version though not fixed1 in
389  * the latest version. Otherwise, with the older version of TDX module, QEMU may
390  * treat the fixed1 bit as unsupported.
391  *
392  * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even
393  * though they changed to configurable bits. Because tdx_fixed1_bits is used to
394  * setup the supported bits.
395  */
396 KvmCpuidInfo tdx_fixed1_bits = {
397     .cpuid.nent = 8,
398     .entries[0] = {
399         .function = 0x1,
400         .index = 0,
401         .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 |
402                CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 |
403                CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
404                CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
405                CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE |
406                CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR,
407         .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
408                CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
409                CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
410                CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR |
411                CPUID_SSE | CPUID_SSE2,
412     },
413     .entries[1] = {
414         .function = 0x6,
415         .index = 0,
416         .eax = CPUID_6_EAX_ARAT,
417     },
418     .entries[2] = {
419         .function = 0x7,
420         .index = 0,
421         .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
422         .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY |
423                CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID |
424                CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED |
425                CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT |
426                CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI,
427         .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI |
428                CPUID_7_0_ECX_MOVDIR64B,
429         .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL |
430                CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D |
431                CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY |
432                CPUID_7_0_EDX_SPEC_CTRL_SSBD,
433     },
434     .entries[3] = {
435         .function = 0x7,
436         .index = 2,
437         .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
438         .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL |
439                CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL,
440     },
441     .entries[4] = {
442         .function = 0xD,
443         .index = 0,
444         .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
445         .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK,
446     },
447     .entries[5] = {
448         .function = 0xD,
449         .index = 1,
450         .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
451         .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC|
452                CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES,
453     },
454     .entries[6] = {
455         .function = 0x80000001,
456         .index = 0,
457         .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
458         /*
459          * Strictly speaking, SYSCALL is not fixed1 bit since it depends on
460          * the CPU to be in 64-bit mode. But here fixed1 is used to serve the
461          * purpose of supported bits for TDX. In this sense, SYACALL is always
462          * supported.
463          */
464         .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
465                CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
466     },
467     .entries[7] = {
468         .function = 0x80000007,
469         .index = 0,
470         .edx = CPUID_APM_INVTSC,
471     },
472 };
473 
474 typedef struct TdxAttrsMap {
475     uint32_t attr_index;
476     uint32_t cpuid_leaf;
477     uint32_t cpuid_subleaf;
478     int cpuid_reg;
479     uint32_t feat_mask;
480 } TdxAttrsMap;
481 
482 static TdxAttrsMap tdx_attrs_maps[] = {
483     {.attr_index = 27,
484      .cpuid_leaf = 7,
485      .cpuid_subleaf = 1,
486      .cpuid_reg = R_EAX,
487      .feat_mask = CPUID_7_1_EAX_LASS,},
488 
489     {.attr_index = 30,
490      .cpuid_leaf = 7,
491      .cpuid_subleaf = 0,
492      .cpuid_reg = R_ECX,
493      .feat_mask = CPUID_7_0_ECX_PKS,},
494 
495     {.attr_index = 31,
496      .cpuid_leaf = 7,
497      .cpuid_subleaf = 0,
498      .cpuid_reg = R_ECX,
499      .feat_mask = CPUID_7_0_ECX_KeyLocker,},
500 };
501 
502 typedef struct TdxXFAMDep {
503     int xfam_bit;
504     FeatureMask feat_mask;
505 } TdxXFAMDep;
506 
507 /*
508  * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are
509  * defiend here.
510  *
511  * For those whose virtualization type are "XFAM & Configured & Native", they
512  * are reported as configurable bits. And they are not supported if not in the
513  * configureable bits list from KVM even if the corresponding XFAM bit is
514  * supported.
515  */
516 TdxXFAMDep tdx_xfam_deps[] = {
517     { XSTATE_YMM_BIT,       { FEAT_1_ECX, CPUID_EXT_FMA }},
518     { XSTATE_YMM_BIT,       { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }},
519     { XSTATE_OPMASK_BIT,    { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}},
520     { XSTATE_OPMASK_BIT,    { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}},
521     { XSTATE_PT_BIT,        { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}},
522     { XSTATE_PKRU_BIT,      { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}},
523     { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }},
524     { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }},
525     { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }},
526 };
527 
find_in_supported_entry(uint32_t function,uint32_t index)528 static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function,
529                                                         uint32_t index)
530 {
531     struct kvm_cpuid_entry2 *e;
532 
533     e = cpuid_find_entry(tdx_supported_cpuid, function, index);
534     if (!e) {
535         if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) {
536             error_report("tdx_supported_cpuid requries more space than %d entries",
537                           KVM_MAX_CPUID_ENTRIES);
538             exit(1);
539         }
540         e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++];
541         e->function = function;
542         e->index = index;
543     }
544 
545     return e;
546 }
547 
tdx_add_supported_cpuid_by_fixed1_bits(void)548 static void tdx_add_supported_cpuid_by_fixed1_bits(void)
549 {
550     struct kvm_cpuid_entry2 *e, *e1;
551     int i;
552 
553     for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) {
554         e = &tdx_fixed1_bits.entries[i];
555 
556         e1 = find_in_supported_entry(e->function, e->index);
557         e1->eax |= e->eax;
558         e1->ebx |= e->ebx;
559         e1->ecx |= e->ecx;
560         e1->edx |= e->edx;
561     }
562 }
563 
tdx_add_supported_cpuid_by_attrs(void)564 static void tdx_add_supported_cpuid_by_attrs(void)
565 {
566     struct kvm_cpuid_entry2 *e;
567     TdxAttrsMap *map;
568     int i;
569 
570     for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) {
571         map = &tdx_attrs_maps[i];
572         if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) {
573             continue;
574         }
575 
576         e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf);
577 
578         switch(map->cpuid_reg) {
579         case R_EAX:
580             e->eax |= map->feat_mask;
581             break;
582         case R_EBX:
583             e->ebx |= map->feat_mask;
584             break;
585         case R_ECX:
586             e->ecx |= map->feat_mask;
587             break;
588         case R_EDX:
589             e->edx |= map->feat_mask;
590             break;
591         }
592     }
593 }
594 
tdx_add_supported_cpuid_by_xfam(void)595 static void tdx_add_supported_cpuid_by_xfam(void)
596 {
597     struct kvm_cpuid_entry2 *e;
598     int i;
599 
600     const TdxXFAMDep *xfam_dep;
601     const FeatureWordInfo *f;
602     for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) {
603         xfam_dep = &tdx_xfam_deps[i];
604         if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) {
605             continue;
606         }
607 
608         f = &feature_word_info[xfam_dep->feat_mask.index];
609         if (f->type != CPUID_FEATURE_WORD) {
610             continue;
611         }
612 
613         e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx);
614         switch(f->cpuid.reg) {
615         case R_EAX:
616             e->eax |= xfam_dep->feat_mask.mask;
617             break;
618         case R_EBX:
619             e->ebx |= xfam_dep->feat_mask.mask;
620             break;
621         case R_ECX:
622             e->ecx |= xfam_dep->feat_mask.mask;
623             break;
624         case R_EDX:
625             e->edx |= xfam_dep->feat_mask.mask;
626             break;
627         }
628     }
629 
630     e = find_in_supported_entry(0xd, 0);
631     e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK);
632     e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32;
633 
634     e = find_in_supported_entry(0xd, 1);
635     /*
636      * Mark XFD always support for TDX, it will be cleared finally in
637      * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware
638      * because in this case the original data has it as 0.
639      */
640     e->eax |= CPUID_XSAVE_XFD;
641     e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK);
642     e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32;
643 }
644 
tdx_add_supported_kvm_features(void)645 static void tdx_add_supported_kvm_features(void)
646 {
647     struct kvm_cpuid_entry2 *e;
648 
649     e = find_in_supported_entry(0x40000001, 0);
650     e->eax = TDX_SUPPORTED_KVM_FEATURES;
651 }
652 
tdx_setup_supported_cpuid(void)653 static void tdx_setup_supported_cpuid(void)
654 {
655     if (tdx_supported_cpuid) {
656         return;
657     }
658 
659     tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) +
660                     KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2));
661 
662     memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries,
663            tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2));
664     tdx_supported_cpuid->nent = tdx_caps->cpuid.nent;
665 
666     tdx_add_supported_cpuid_by_fixed1_bits();
667     tdx_add_supported_cpuid_by_attrs();
668     tdx_add_supported_cpuid_by_xfam();
669 
670     tdx_add_supported_kvm_features();
671 }
672 
tdx_kvm_init(ConfidentialGuestSupport * cgs,Error ** errp)673 static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
674 {
675     MachineState *ms = MACHINE(qdev_get_machine());
676     X86MachineState *x86ms = X86_MACHINE(ms);
677     TdxGuest *tdx = TDX_GUEST(cgs);
678     int r = 0;
679 
680     kvm_mark_guest_state_protected();
681 
682     if (x86ms->smm == ON_OFF_AUTO_AUTO) {
683         x86ms->smm = ON_OFF_AUTO_OFF;
684     } else if (x86ms->smm == ON_OFF_AUTO_ON) {
685         error_setg(errp, "TDX VM doesn't support SMM");
686         return -EINVAL;
687     }
688 
689     if (x86ms->pic == ON_OFF_AUTO_AUTO) {
690         x86ms->pic = ON_OFF_AUTO_OFF;
691     } else if (x86ms->pic == ON_OFF_AUTO_ON) {
692         error_setg(errp, "TDX VM doesn't support PIC");
693         return -EINVAL;
694     }
695 
696     if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) {
697         kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON;
698     } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) {
699         error_setg(errp, "TDX VM requires kernel_irqchip to be split");
700         return -EINVAL;
701     }
702 
703     if (!tdx_caps) {
704         r = get_tdx_capabilities(errp);
705         if (r) {
706             return r;
707         }
708     }
709 
710     tdx_setup_supported_cpuid();
711 
712     /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */
713     if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) {
714         return -EOPNOTSUPP;
715     }
716 
717     /*
718      * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly
719      * memory for shared memory but not for private memory. Besides, whether a
720      * memslot is private or shared is not determined by QEMU.
721      *
722      * Thus, just mark readonly memory not supported for simplicity.
723      */
724     kvm_readonly_mem_allowed = false;
725 
726     qemu_add_machine_init_done_notifier(&tdx_machine_done_notify);
727 
728     tdx_guest = tdx;
729     return 0;
730 }
731 
tdx_kvm_type(X86ConfidentialGuest * cg)732 static int tdx_kvm_type(X86ConfidentialGuest *cg)
733 {
734     /* Do the object check */
735     TDX_GUEST(cg);
736 
737     return KVM_X86_TDX_VM;
738 }
739 
tdx_cpu_instance_init(X86ConfidentialGuest * cg,CPUState * cpu)740 static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
741 {
742     X86CPU *x86cpu = X86_CPU(cpu);
743 
744     object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort);
745 
746     /* invtsc is fixed1 for TD guest */
747     object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort);
748 
749     x86cpu->enable_cpuid_0x1f = true;
750 }
751 
tdx_adjust_cpuid_features(X86ConfidentialGuest * cg,uint32_t feature,uint32_t index,int reg,uint32_t value)752 static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg,
753                                           uint32_t feature, uint32_t index,
754                                           int reg, uint32_t value)
755 {
756     struct kvm_cpuid_entry2 *e;
757 
758     e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index);
759     if (e) {
760         value |= cpuid_entry_get_reg(e, reg);
761     }
762 
763     if (is_feature_word_cpuid(feature, index, reg)) {
764         e = cpuid_find_entry(tdx_supported_cpuid, feature, index);
765         if (e) {
766             value &= cpuid_entry_get_reg(e, reg);
767         }
768     }
769 
770     return value;
771 }
772 
tdx_fetch_cpuid(CPUState * cpu,int * ret)773 static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret)
774 {
775     struct kvm_cpuid2 *fetch_cpuid;
776     int size = KVM_MAX_CPUID_ENTRIES;
777     Error *local_err = NULL;
778     int r;
779 
780     do {
781         error_free(local_err);
782         local_err = NULL;
783 
784         fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) +
785                                 sizeof(struct kvm_cpuid_entry2) * size);
786         fetch_cpuid->nent = size;
787         r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err);
788         if (r == -E2BIG) {
789             g_free(fetch_cpuid);
790             size = fetch_cpuid->nent;
791         }
792     } while (r == -E2BIG);
793 
794     if (r < 0) {
795         error_report_err(local_err);
796         *ret = r;
797         return NULL;
798     }
799 
800     return fetch_cpuid;
801 }
802 
tdx_check_features(X86ConfidentialGuest * cg,CPUState * cs)803 static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs)
804 {
805     uint64_t actual, requested, unavailable, forced_on;
806     g_autofree struct kvm_cpuid2 *fetch_cpuid;
807     const char *forced_on_prefix = NULL;
808     const char *unav_prefix = NULL;
809     struct kvm_cpuid_entry2 *entry;
810     X86CPU *cpu = X86_CPU(cs);
811     CPUX86State *env = &cpu->env;
812     FeatureWordInfo *wi;
813     FeatureWord w;
814     bool mismatch = false;
815     int r;
816 
817     fetch_cpuid = tdx_fetch_cpuid(cs, &r);
818     if (!fetch_cpuid) {
819         return r;
820     }
821 
822     if (cpu->check_cpuid || cpu->enforce_cpuid) {
823         unav_prefix = "TDX doesn't support requested feature";
824         forced_on_prefix = "TDX forcibly sets the feature";
825     }
826 
827     for (w = 0; w < FEATURE_WORDS; w++) {
828         wi = &feature_word_info[w];
829         actual = 0;
830 
831         switch (wi->type) {
832         case CPUID_FEATURE_WORD:
833             entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx);
834             if (!entry) {
835                 /*
836                  * If KVM doesn't report it means it's totally configurable
837                  * by QEMU
838                  */
839                 continue;
840             }
841 
842             actual = cpuid_entry_get_reg(entry, wi->cpuid.reg);
843             break;
844         case MSR_FEATURE_WORD:
845             /*
846              * TODO:
847              * validate MSR features when KVM has interface report them.
848              */
849             continue;
850         }
851 
852         /* Fixup for special cases */
853         switch (w) {
854         case FEAT_8000_0001_EDX:
855             /*
856              * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit
857              * mode and before vcpu running it's not in 64-bit mode.
858              */
859             actual |= CPUID_EXT2_SYSCALL;
860             break;
861         default:
862             break;
863         }
864 
865         requested = env->features[w];
866         unavailable = requested & ~actual;
867         mark_unavailable_features(cpu, w, unavailable, unav_prefix);
868         if (unavailable) {
869             mismatch = true;
870         }
871 
872         forced_on = actual & ~requested;
873         mark_forced_on_features(cpu, w, forced_on, forced_on_prefix);
874         if (forced_on) {
875             mismatch = true;
876         }
877     }
878 
879     if (cpu->enforce_cpuid && mismatch) {
880         return -EINVAL;
881     }
882 
883     if (cpu->phys_bits != host_cpu_phys_bits()) {
884         error_report("TDX requires guest CPU physical bits (%u) "
885                      "to match host CPU physical bits (%u)",
886                      cpu->phys_bits, host_cpu_phys_bits());
887         return -EINVAL;
888     }
889 
890     return 0;
891 }
892 
tdx_validate_attributes(TdxGuest * tdx,Error ** errp)893 static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
894 {
895     if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
896         error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM "
897                    "(KVM supported: 0x%"PRIx64")", tdx->attributes,
898                    (uint64_t)tdx_caps->supported_attrs);
899         return -1;
900     }
901 
902     if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) {
903         error_setg(errp, "Some QEMU unsupported TD attribute bits being "
904                     "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")",
905                     tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS);
906         return -1;
907     }
908 
909     return 0;
910 }
911 
setup_td_guest_attributes(X86CPU * x86cpu,Error ** errp)912 static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp)
913 {
914     CPUX86State *env = &x86cpu->env;
915 
916     tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ?
917                              TDX_TD_ATTRIBUTES_PKS : 0;
918     tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0;
919 
920     return tdx_validate_attributes(tdx_guest, errp);
921 }
922 
setup_td_xfam(X86CPU * x86cpu,Error ** errp)923 static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
924 {
925     CPUX86State *env = &x86cpu->env;
926     uint64_t xfam;
927 
928     xfam = env->features[FEAT_XSAVE_XCR0_LO] |
929            env->features[FEAT_XSAVE_XCR0_HI] |
930            env->features[FEAT_XSAVE_XSS_LO] |
931            env->features[FEAT_XSAVE_XSS_HI];
932 
933     if (xfam & ~tdx_caps->supported_xfam) {
934         error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))",
935                    xfam, (uint64_t)tdx_caps->supported_xfam);
936         return -1;
937     }
938 
939     tdx_guest->xfam = xfam;
940     return 0;
941 }
942 
tdx_filter_cpuid(struct kvm_cpuid2 * cpuids)943 static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
944 {
945     int i, dest_cnt = 0;
946     struct kvm_cpuid_entry2 *src, *dest, *conf;
947 
948     for (i = 0; i < cpuids->nent; i++) {
949         src = cpuids->entries + i;
950         conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
951         if (!conf) {
952             continue;
953         }
954         dest = cpuids->entries + dest_cnt;
955 
956         dest->function = src->function;
957         dest->index = src->index;
958         dest->flags = src->flags;
959         dest->eax = src->eax & conf->eax;
960         dest->ebx = src->ebx & conf->ebx;
961         dest->ecx = src->ecx & conf->ecx;
962         dest->edx = src->edx & conf->edx;
963 
964         dest_cnt++;
965     }
966     cpuids->nent = dest_cnt++;
967 }
968 
tdx_pre_create_vcpu(CPUState * cpu,Error ** errp)969 int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
970 {
971     X86CPU *x86cpu = X86_CPU(cpu);
972     CPUX86State *env = &x86cpu->env;
973     g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
974     Error *local_err = NULL;
975     size_t data_len;
976     int retry = 10000;
977     int r = 0;
978 
979     QEMU_LOCK_GUARD(&tdx_guest->lock);
980     if (tdx_guest->initialized) {
981         return r;
982     }
983 
984     init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
985                         sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
986 
987     if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) {
988         error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS");
989         return -EOPNOTSUPP;
990     }
991 
992     r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
993                           0, TDX_APIC_BUS_CYCLES_NS);
994     if (r < 0) {
995         error_setg_errno(errp, -r,
996                          "Unable to set core crystal clock frequency to 25MHz");
997         return r;
998     }
999 
1000     if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ ||
1001                          env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) {
1002         error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency "
1003                          "between [%d, %d] kHz", env->tsc_khz,
1004                          TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ);
1005        return -EINVAL;
1006     }
1007 
1008     if (env->tsc_khz % (25 * 1000)) {
1009         error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz",
1010                    env->tsc_khz);
1011         return -EINVAL;
1012     }
1013 
1014     /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */
1015     r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz);
1016     if (r < 0) {
1017         error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz",
1018                          env->tsc_khz);
1019         return r;
1020     }
1021 
1022     if (tdx_guest->mrconfigid) {
1023         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid,
1024                               strlen(tdx_guest->mrconfigid), &data_len, errp);
1025         if (!data) {
1026             return -1;
1027         }
1028         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
1029             error_setg(errp, "TDX: failed to decode mrconfigid");
1030             return -1;
1031         }
1032         memcpy(init_vm->mrconfigid, data, data_len);
1033     }
1034 
1035     if (tdx_guest->mrowner) {
1036         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner,
1037                               strlen(tdx_guest->mrowner), &data_len, errp);
1038         if (!data) {
1039             return -1;
1040         }
1041         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
1042             error_setg(errp, "TDX: failed to decode mrowner");
1043             return -1;
1044         }
1045         memcpy(init_vm->mrowner, data, data_len);
1046     }
1047 
1048     if (tdx_guest->mrownerconfig) {
1049         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig,
1050                             strlen(tdx_guest->mrownerconfig), &data_len, errp);
1051         if (!data) {
1052             return -1;
1053         }
1054         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
1055             error_setg(errp, "TDX: failed to decode mrownerconfig");
1056             return -1;
1057         }
1058         memcpy(init_vm->mrownerconfig, data, data_len);
1059     }
1060 
1061     r = setup_td_guest_attributes(x86cpu, errp);
1062     if (r) {
1063         return r;
1064     }
1065 
1066     r = setup_td_xfam(x86cpu, errp);
1067     if (r) {
1068         return r;
1069     }
1070 
1071     init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
1072     tdx_filter_cpuid(&init_vm->cpuid);
1073 
1074     init_vm->attributes = tdx_guest->attributes;
1075     init_vm->xfam = tdx_guest->xfam;
1076 
1077     /*
1078      * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE)
1079      * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or
1080      * RDSEED) is busy.
1081      *
1082      * Retry for the case.
1083      */
1084     do {
1085         error_free(local_err);
1086         local_err = NULL;
1087         r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err);
1088     } while (r == -EAGAIN && --retry);
1089 
1090     if (r < 0) {
1091         if (!retry) {
1092             error_append_hint(&local_err, "Hardware RNG (Random Number "
1093             "Generator) is busy occupied by someone (via RDRAND/RDSEED) "
1094             "maliciously, which leads to KVM_TDX_INIT_VM keeping failure "
1095             "due to lack of entropy.\n");
1096         }
1097         error_propagate(errp, local_err);
1098         return r;
1099     }
1100 
1101     tdx_guest->initialized = true;
1102 
1103     return 0;
1104 }
1105 
tdx_parse_tdvf(void * flash_ptr,int size)1106 int tdx_parse_tdvf(void *flash_ptr, int size)
1107 {
1108     return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
1109 }
1110 
tdx_panicked_on_fatal_error(X86CPU * cpu,uint64_t error_code,char * message,uint64_t gpa)1111 static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
1112                                         char *message, uint64_t gpa)
1113 {
1114     GuestPanicInformation *panic_info;
1115 
1116     panic_info = g_new0(GuestPanicInformation, 1);
1117     panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX;
1118     panic_info->u.tdx.error_code = (uint32_t) error_code;
1119     panic_info->u.tdx.message = message;
1120     panic_info->u.tdx.gpa = gpa;
1121 
1122     qemu_system_guest_panicked(panic_info);
1123 }
1124 
1125 /*
1126  * Only 8 registers can contain valid ASCII byte stream to form the fatal
1127  * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX
1128  */
1129 #define TDX_FATAL_MESSAGE_MAX        64
1130 
1131 #define TDX_REPORT_FATAL_ERROR_GPA_VALID    BIT_ULL(63)
1132 
tdx_handle_report_fatal_error(X86CPU * cpu,struct kvm_run * run)1133 int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
1134 {
1135     uint64_t error_code = run->system_event.data[R_R12];
1136     uint64_t reg_mask = run->system_event.data[R_ECX];
1137     char *message = NULL;
1138     uint64_t *tmp;
1139     uint64_t gpa = -1ull;
1140 
1141     if (error_code & 0xffff) {
1142         error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64,
1143                      error_code);
1144         return -1;
1145     }
1146 
1147     if (reg_mask) {
1148         message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1);
1149         tmp = (uint64_t *)message;
1150 
1151 #define COPY_REG(REG)                               \
1152     do {                                            \
1153         if (reg_mask & BIT_ULL(REG)) {              \
1154             *(tmp++) = run->system_event.data[REG]; \
1155         }                                           \
1156     } while (0)
1157 
1158         COPY_REG(R_R14);
1159         COPY_REG(R_R15);
1160         COPY_REG(R_EBX);
1161         COPY_REG(R_EDI);
1162         COPY_REG(R_ESI);
1163         COPY_REG(R_R8);
1164         COPY_REG(R_R9);
1165         COPY_REG(R_EDX);
1166         *((char *)tmp) = '\0';
1167     }
1168 #undef COPY_REG
1169 
1170     if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) {
1171         gpa = run->system_event.data[R_R13];
1172     }
1173 
1174     tdx_panicked_on_fatal_error(cpu, error_code, message, gpa);
1175 
1176     return -1;
1177 }
1178 
tdx_guest_get_sept_ve_disable(Object * obj,Error ** errp)1179 static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp)
1180 {
1181     TdxGuest *tdx = TDX_GUEST(obj);
1182 
1183     return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE);
1184 }
1185 
tdx_guest_set_sept_ve_disable(Object * obj,bool value,Error ** errp)1186 static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp)
1187 {
1188     TdxGuest *tdx = TDX_GUEST(obj);
1189 
1190     if (value) {
1191         tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
1192     } else {
1193         tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
1194     }
1195 }
1196 
tdx_guest_get_mrconfigid(Object * obj,Error ** errp)1197 static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp)
1198 {
1199     TdxGuest *tdx = TDX_GUEST(obj);
1200 
1201     return g_strdup(tdx->mrconfigid);
1202 }
1203 
tdx_guest_set_mrconfigid(Object * obj,const char * value,Error ** errp)1204 static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp)
1205 {
1206     TdxGuest *tdx = TDX_GUEST(obj);
1207 
1208     g_free(tdx->mrconfigid);
1209     tdx->mrconfigid = g_strdup(value);
1210 }
1211 
tdx_guest_get_mrowner(Object * obj,Error ** errp)1212 static char *tdx_guest_get_mrowner(Object *obj, Error **errp)
1213 {
1214     TdxGuest *tdx = TDX_GUEST(obj);
1215 
1216     return g_strdup(tdx->mrowner);
1217 }
1218 
tdx_guest_set_mrowner(Object * obj,const char * value,Error ** errp)1219 static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp)
1220 {
1221     TdxGuest *tdx = TDX_GUEST(obj);
1222 
1223     g_free(tdx->mrowner);
1224     tdx->mrowner = g_strdup(value);
1225 }
1226 
tdx_guest_get_mrownerconfig(Object * obj,Error ** errp)1227 static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp)
1228 {
1229     TdxGuest *tdx = TDX_GUEST(obj);
1230 
1231     return g_strdup(tdx->mrownerconfig);
1232 }
1233 
tdx_guest_set_mrownerconfig(Object * obj,const char * value,Error ** errp)1234 static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp)
1235 {
1236     TdxGuest *tdx = TDX_GUEST(obj);
1237 
1238     g_free(tdx->mrownerconfig);
1239     tdx->mrownerconfig = g_strdup(value);
1240 }
1241 
1242 /* tdx guest */
1243 OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
1244                                    tdx_guest,
1245                                    TDX_GUEST,
1246                                    X86_CONFIDENTIAL_GUEST,
1247                                    { TYPE_USER_CREATABLE },
1248                                    { NULL })
1249 
tdx_guest_init(Object * obj)1250 static void tdx_guest_init(Object *obj)
1251 {
1252     ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
1253     TdxGuest *tdx = TDX_GUEST(obj);
1254 
1255     qemu_mutex_init(&tdx->lock);
1256 
1257     cgs->require_guest_memfd = true;
1258     tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
1259 
1260     object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
1261                                    OBJ_PROP_FLAG_READWRITE);
1262     object_property_add_bool(obj, "sept-ve-disable",
1263                              tdx_guest_get_sept_ve_disable,
1264                              tdx_guest_set_sept_ve_disable);
1265     object_property_add_str(obj, "mrconfigid",
1266                             tdx_guest_get_mrconfigid,
1267                             tdx_guest_set_mrconfigid);
1268     object_property_add_str(obj, "mrowner",
1269                             tdx_guest_get_mrowner, tdx_guest_set_mrowner);
1270     object_property_add_str(obj, "mrownerconfig",
1271                             tdx_guest_get_mrownerconfig,
1272                             tdx_guest_set_mrownerconfig);
1273 }
1274 
tdx_guest_finalize(Object * obj)1275 static void tdx_guest_finalize(Object *obj)
1276 {
1277 }
1278 
tdx_guest_class_init(ObjectClass * oc,const void * data)1279 static void tdx_guest_class_init(ObjectClass *oc, const void *data)
1280 {
1281     ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
1282     X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
1283 
1284     klass->kvm_init = tdx_kvm_init;
1285     x86_klass->kvm_type = tdx_kvm_type;
1286     x86_klass->cpu_instance_init = tdx_cpu_instance_init;
1287     x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features;
1288     x86_klass->check_features = tdx_check_features;
1289 }
1290