xref: /qemu/target/i386/kvm/tdx.c (revision bb45580d842530d78b58179eaf80b6331b15324e)
1 /*
2  * QEMU TDX support
3  *
4  * Copyright (c) 2025 Intel Corporation
5  *
6  * Author:
7  *      Xiaoyao Li <xiaoyao.li@intel.com>
8  *
9  * SPDX-License-Identifier: GPL-2.0-or-later
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/error-report.h"
14 #include "qemu/base64.h"
15 #include "qemu/mmap-alloc.h"
16 #include "qapi/error.h"
17 #include "qom/object_interfaces.h"
18 #include "crypto/hash.h"
19 #include "system/kvm_int.h"
20 #include "system/runstate.h"
21 #include "system/system.h"
22 #include "system/ramblock.h"
23 
24 #include <linux/kvm_para.h>
25 
26 #include "hw/i386/e820_memory_layout.h"
27 #include "hw/i386/tdvf.h"
28 #include "hw/i386/x86.h"
29 #include "hw/i386/tdvf-hob.h"
30 #include "kvm_i386.h"
31 #include "tdx.h"
32 
33 #define TDX_MIN_TSC_FREQUENCY_KHZ   (100 * 1000)
34 #define TDX_MAX_TSC_FREQUENCY_KHZ   (10 * 1000 * 1000)
35 
36 #define TDX_TD_ATTRIBUTES_DEBUG             BIT_ULL(0)
37 #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE   BIT_ULL(28)
38 #define TDX_TD_ATTRIBUTES_PKS               BIT_ULL(30)
39 #define TDX_TD_ATTRIBUTES_PERFMON           BIT_ULL(63)
40 
41 #define TDX_SUPPORTED_TD_ATTRS  (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\
42                                  TDX_TD_ATTRIBUTES_PKS | \
43                                  TDX_TD_ATTRIBUTES_PERFMON)
44 
45 static TdxGuest *tdx_guest;
46 
47 static struct kvm_tdx_capabilities *tdx_caps;
48 
49 /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */
50 bool is_tdx_vm(void)
51 {
52     return !!tdx_guest;
53 }
54 
55 enum tdx_ioctl_level {
56     TDX_VM_IOCTL,
57     TDX_VCPU_IOCTL,
58 };
59 
60 static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state,
61                               int cmd_id, __u32 flags, void *data,
62                               Error **errp)
63 {
64     struct kvm_tdx_cmd tdx_cmd = {};
65     int r;
66 
67     const char *tdx_ioctl_name[] = {
68         [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES",
69         [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM",
70         [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU",
71         [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION",
72         [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM",
73         [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID",
74     };
75 
76     tdx_cmd.id = cmd_id;
77     tdx_cmd.flags = flags;
78     tdx_cmd.data = (__u64)(unsigned long)data;
79 
80     switch (level) {
81     case TDX_VM_IOCTL:
82         r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
83         break;
84     case TDX_VCPU_IOCTL:
85         r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
86         break;
87     default:
88         error_setg(errp, "Invalid tdx_ioctl_level %d", level);
89         return -EINVAL;
90     }
91 
92     if (r < 0) {
93         error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx",
94                          tdx_ioctl_name[cmd_id], tdx_cmd.hw_error);
95     }
96     return r;
97 }
98 
99 static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data,
100                                Error **errp)
101 {
102     return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp);
103 }
104 
105 static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags,
106                                  void *data, Error **errp)
107 {
108     return  tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp);
109 }
110 
111 static int get_tdx_capabilities(Error **errp)
112 {
113     struct kvm_tdx_capabilities *caps;
114     /* 1st generation of TDX reports 6 cpuid configs */
115     int nr_cpuid_configs = 6;
116     size_t size;
117     int r;
118 
119     do {
120         Error *local_err = NULL;
121         size = sizeof(struct kvm_tdx_capabilities) +
122                       nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2);
123         caps = g_malloc0(size);
124         caps->cpuid.nent = nr_cpuid_configs;
125 
126         r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err);
127         if (r == -E2BIG) {
128             g_free(caps);
129             nr_cpuid_configs *= 2;
130             if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) {
131                 error_report("KVM TDX seems broken that number of CPUID entries"
132                              " in kvm_tdx_capabilities exceeds limit: %d",
133                              KVM_MAX_CPUID_ENTRIES);
134                 error_propagate(errp, local_err);
135                 return r;
136             }
137             error_free(local_err);
138         } else if (r < 0) {
139             g_free(caps);
140             error_propagate(errp, local_err);
141             return r;
142         }
143     } while (r == -E2BIG);
144 
145     tdx_caps = caps;
146 
147     return 0;
148 }
149 
150 void tdx_set_tdvf_region(MemoryRegion *tdvf_mr)
151 {
152     assert(!tdx_guest->tdvf_mr);
153     tdx_guest->tdvf_mr = tdvf_mr;
154 }
155 
156 static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx)
157 {
158     TdxFirmwareEntry *entry;
159 
160     for_each_tdx_fw_entry(&tdx->tdvf, entry) {
161         if (entry->type == TDVF_SECTION_TYPE_TD_HOB) {
162             return entry;
163         }
164     }
165     error_report("TDVF metadata doesn't specify TD_HOB location.");
166     exit(1);
167 }
168 
169 static void tdx_add_ram_entry(uint64_t address, uint64_t length,
170                               enum TdxRamType type)
171 {
172     uint32_t nr_entries = tdx_guest->nr_ram_entries;
173     tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries,
174                                      nr_entries + 1);
175 
176     tdx_guest->ram_entries[nr_entries].address = address;
177     tdx_guest->ram_entries[nr_entries].length = length;
178     tdx_guest->ram_entries[nr_entries].type = type;
179     tdx_guest->nr_ram_entries++;
180 }
181 
182 static int tdx_accept_ram_range(uint64_t address, uint64_t length)
183 {
184     uint64_t head_start, tail_start, head_length, tail_length;
185     uint64_t tmp_address, tmp_length;
186     TdxRamEntry *e;
187     int i = 0;
188 
189     do {
190         if (i == tdx_guest->nr_ram_entries) {
191             return -1;
192         }
193 
194         e = &tdx_guest->ram_entries[i++];
195     } while (address + length <= e->address || address >= e->address + e->length);
196 
197     /*
198      * The to-be-accepted ram range must be fully contained by one
199      * RAM entry.
200      */
201     if (e->address > address ||
202         e->address + e->length < address + length) {
203         return -1;
204     }
205 
206     if (e->type == TDX_RAM_ADDED) {
207         return 0;
208     }
209 
210     tmp_address = e->address;
211     tmp_length = e->length;
212 
213     e->address = address;
214     e->length = length;
215     e->type = TDX_RAM_ADDED;
216 
217     head_length = address - tmp_address;
218     if (head_length > 0) {
219         head_start = tmp_address;
220         tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED);
221     }
222 
223     tail_start = address + length;
224     if (tail_start < tmp_address + tmp_length) {
225         tail_length = tmp_address + tmp_length - tail_start;
226         tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED);
227     }
228 
229     return 0;
230 }
231 
232 static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_)
233 {
234     const TdxRamEntry *lhs = lhs_;
235     const TdxRamEntry *rhs = rhs_;
236 
237     if (lhs->address == rhs->address) {
238         return 0;
239     }
240     if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) {
241         return 1;
242     }
243     return -1;
244 }
245 
246 static void tdx_init_ram_entries(void)
247 {
248     unsigned i, j, nr_e820_entries;
249 
250     nr_e820_entries = e820_get_table(NULL);
251     tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries);
252 
253     for (i = 0, j = 0; i < nr_e820_entries; i++) {
254         uint64_t addr, len;
255 
256         if (e820_get_entry(i, E820_RAM, &addr, &len)) {
257             tdx_guest->ram_entries[j].address = addr;
258             tdx_guest->ram_entries[j].length = len;
259             tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED;
260             j++;
261         }
262     }
263     tdx_guest->nr_ram_entries = j;
264 }
265 
266 static void tdx_post_init_vcpus(void)
267 {
268     TdxFirmwareEntry *hob;
269     CPUState *cpu;
270 
271     hob = tdx_get_hob_entry(tdx_guest);
272     CPU_FOREACH(cpu) {
273         tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)hob->address,
274                        &error_fatal);
275     }
276 }
277 
278 static void tdx_finalize_vm(Notifier *notifier, void *unused)
279 {
280     TdxFirmware *tdvf = &tdx_guest->tdvf;
281     TdxFirmwareEntry *entry;
282     RAMBlock *ram_block;
283     Error *local_err = NULL;
284     int r;
285 
286     tdx_init_ram_entries();
287 
288     for_each_tdx_fw_entry(tdvf, entry) {
289         switch (entry->type) {
290         case TDVF_SECTION_TYPE_BFV:
291         case TDVF_SECTION_TYPE_CFV:
292             entry->mem_ptr = tdvf->mem_ptr + entry->data_offset;
293             break;
294         case TDVF_SECTION_TYPE_TD_HOB:
295         case TDVF_SECTION_TYPE_TEMP_MEM:
296             entry->mem_ptr = qemu_ram_mmap(-1, entry->size,
297                                            qemu_real_host_page_size(), 0, 0);
298             if (entry->mem_ptr == MAP_FAILED) {
299                 error_report("Failed to mmap memory for TDVF section %d",
300                              entry->type);
301                 exit(1);
302             }
303             if (tdx_accept_ram_range(entry->address, entry->size)) {
304                 error_report("Failed to accept memory for TDVF section %d",
305                              entry->type);
306                 qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
307                 exit(1);
308             }
309             break;
310         default:
311             error_report("Unsupported TDVF section %d", entry->type);
312             exit(1);
313         }
314     }
315 
316     qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries,
317           sizeof(TdxRamEntry), &tdx_ram_entry_compare);
318 
319     tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest));
320 
321     tdx_post_init_vcpus();
322 
323     for_each_tdx_fw_entry(tdvf, entry) {
324         struct kvm_tdx_init_mem_region region;
325         uint32_t flags;
326 
327         region = (struct kvm_tdx_init_mem_region) {
328             .source_addr = (uint64_t)entry->mem_ptr,
329             .gpa = entry->address,
330             .nr_pages = entry->size >> 12,
331         };
332 
333         flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ?
334                 KVM_TDX_MEASURE_MEMORY_REGION : 0;
335 
336         do {
337             error_free(local_err);
338             local_err = NULL;
339             r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags,
340                                &region, &local_err);
341         } while (r == -EAGAIN || r == -EINTR);
342         if (r < 0) {
343             error_report_err(local_err);
344             exit(1);
345         }
346 
347         if (entry->type == TDVF_SECTION_TYPE_TD_HOB ||
348             entry->type == TDVF_SECTION_TYPE_TEMP_MEM) {
349             qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
350             entry->mem_ptr = NULL;
351         }
352     }
353 
354     /*
355      * TDVF image has been copied into private region above via
356      * KVM_MEMORY_MAPPING. It becomes useless.
357      */
358     ram_block = tdx_guest->tdvf_mr->ram_block;
359     ram_block_discard_range(ram_block, 0, ram_block->max_length);
360 
361     tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal);
362     CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true;
363 }
364 
365 static Notifier tdx_machine_done_notify = {
366     .notify = tdx_finalize_vm,
367 };
368 
369 static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
370 {
371     MachineState *ms = MACHINE(qdev_get_machine());
372     X86MachineState *x86ms = X86_MACHINE(ms);
373     TdxGuest *tdx = TDX_GUEST(cgs);
374     int r = 0;
375 
376     kvm_mark_guest_state_protected();
377 
378     if (x86ms->smm == ON_OFF_AUTO_AUTO) {
379         x86ms->smm = ON_OFF_AUTO_OFF;
380     } else if (x86ms->smm == ON_OFF_AUTO_ON) {
381         error_setg(errp, "TDX VM doesn't support SMM");
382         return -EINVAL;
383     }
384 
385     if (x86ms->pic == ON_OFF_AUTO_AUTO) {
386         x86ms->pic = ON_OFF_AUTO_OFF;
387     } else if (x86ms->pic == ON_OFF_AUTO_ON) {
388         error_setg(errp, "TDX VM doesn't support PIC");
389         return -EINVAL;
390     }
391 
392     if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) {
393         kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON;
394     } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) {
395         error_setg(errp, "TDX VM requires kernel_irqchip to be split");
396         return -EINVAL;
397     }
398 
399     if (!tdx_caps) {
400         r = get_tdx_capabilities(errp);
401         if (r) {
402             return r;
403         }
404     }
405 
406     /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */
407     if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) {
408         return -EOPNOTSUPP;
409     }
410 
411     /*
412      * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly
413      * memory for shared memory but not for private memory. Besides, whether a
414      * memslot is private or shared is not determined by QEMU.
415      *
416      * Thus, just mark readonly memory not supported for simplicity.
417      */
418     kvm_readonly_mem_allowed = false;
419 
420     qemu_add_machine_init_done_notifier(&tdx_machine_done_notify);
421 
422     tdx_guest = tdx;
423     return 0;
424 }
425 
426 static int tdx_kvm_type(X86ConfidentialGuest *cg)
427 {
428     /* Do the object check */
429     TDX_GUEST(cg);
430 
431     return KVM_X86_TDX_VM;
432 }
433 
434 static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
435 {
436     X86CPU *x86cpu = X86_CPU(cpu);
437 
438     object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort);
439 
440     x86cpu->enable_cpuid_0x1f = true;
441 }
442 
443 static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
444 {
445     if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
446         error_setg(errp, "Invalid attributes 0x%lx for TDX VM "
447                    "(KVM supported: 0x%llx)", tdx->attributes,
448                    tdx_caps->supported_attrs);
449         return -1;
450     }
451 
452     if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) {
453         error_setg(errp, "Some QEMU unsupported TD attribute bits being "
454                     "requested: 0x%lx (QEMU supported: 0x%llx)",
455                     tdx->attributes, TDX_SUPPORTED_TD_ATTRS);
456         return -1;
457     }
458 
459     return 0;
460 }
461 
462 static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp)
463 {
464     CPUX86State *env = &x86cpu->env;
465 
466     tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ?
467                              TDX_TD_ATTRIBUTES_PKS : 0;
468     tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0;
469 
470     return tdx_validate_attributes(tdx_guest, errp);
471 }
472 
473 static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
474 {
475     CPUX86State *env = &x86cpu->env;
476     uint64_t xfam;
477 
478     xfam = env->features[FEAT_XSAVE_XCR0_LO] |
479            env->features[FEAT_XSAVE_XCR0_HI] |
480            env->features[FEAT_XSAVE_XSS_LO] |
481            env->features[FEAT_XSAVE_XSS_HI];
482 
483     if (xfam & ~tdx_caps->supported_xfam) {
484         error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))",
485                    xfam, tdx_caps->supported_xfam);
486         return -1;
487     }
488 
489     tdx_guest->xfam = xfam;
490     return 0;
491 }
492 
493 static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
494 {
495     int i, dest_cnt = 0;
496     struct kvm_cpuid_entry2 *src, *dest, *conf;
497 
498     for (i = 0; i < cpuids->nent; i++) {
499         src = cpuids->entries + i;
500         conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
501         if (!conf) {
502             continue;
503         }
504         dest = cpuids->entries + dest_cnt;
505 
506         dest->function = src->function;
507         dest->index = src->index;
508         dest->flags = src->flags;
509         dest->eax = src->eax & conf->eax;
510         dest->ebx = src->ebx & conf->ebx;
511         dest->ecx = src->ecx & conf->ecx;
512         dest->edx = src->edx & conf->edx;
513 
514         dest_cnt++;
515     }
516     cpuids->nent = dest_cnt++;
517 }
518 
519 int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
520 {
521     X86CPU *x86cpu = X86_CPU(cpu);
522     CPUX86State *env = &x86cpu->env;
523     g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
524     Error *local_err = NULL;
525     size_t data_len;
526     int retry = 10000;
527     int r = 0;
528 
529     QEMU_LOCK_GUARD(&tdx_guest->lock);
530     if (tdx_guest->initialized) {
531         return r;
532     }
533 
534     init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
535                         sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
536 
537     if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) {
538         error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS");
539         return -EOPNOTSUPP;
540     }
541 
542     r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
543                           0, TDX_APIC_BUS_CYCLES_NS);
544     if (r < 0) {
545         error_setg_errno(errp, -r,
546                          "Unable to set core crystal clock frequency to 25MHz");
547         return r;
548     }
549 
550     if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ ||
551                          env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) {
552         error_setg(errp, "Invalid TSC %ld KHz, must specify cpu_frequency "
553                          "between [%d, %d] kHz", env->tsc_khz,
554                          TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ);
555        return -EINVAL;
556     }
557 
558     if (env->tsc_khz % (25 * 1000)) {
559         error_setg(errp, "Invalid TSC %ld KHz, it must be multiple of 25MHz",
560                    env->tsc_khz);
561         return -EINVAL;
562     }
563 
564     /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */
565     r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz);
566     if (r < 0) {
567         error_setg_errno(errp, -r, "Unable to set TSC frequency to %ld kHz",
568                          env->tsc_khz);
569         return r;
570     }
571 
572     if (tdx_guest->mrconfigid) {
573         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid,
574                               strlen(tdx_guest->mrconfigid), &data_len, errp);
575         if (!data) {
576             return -1;
577         }
578         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
579             error_setg(errp, "TDX: failed to decode mrconfigid");
580             return -1;
581         }
582         memcpy(init_vm->mrconfigid, data, data_len);
583     }
584 
585     if (tdx_guest->mrowner) {
586         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner,
587                               strlen(tdx_guest->mrowner), &data_len, errp);
588         if (!data) {
589             return -1;
590         }
591         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
592             error_setg(errp, "TDX: failed to decode mrowner");
593             return -1;
594         }
595         memcpy(init_vm->mrowner, data, data_len);
596     }
597 
598     if (tdx_guest->mrownerconfig) {
599         g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig,
600                             strlen(tdx_guest->mrownerconfig), &data_len, errp);
601         if (!data) {
602             return -1;
603         }
604         if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
605             error_setg(errp, "TDX: failed to decode mrownerconfig");
606             return -1;
607         }
608         memcpy(init_vm->mrownerconfig, data, data_len);
609     }
610 
611     r = setup_td_guest_attributes(x86cpu, errp);
612     if (r) {
613         return r;
614     }
615 
616     r = setup_td_xfam(x86cpu, errp);
617     if (r) {
618         return r;
619     }
620 
621     init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
622     tdx_filter_cpuid(&init_vm->cpuid);
623 
624     init_vm->attributes = tdx_guest->attributes;
625     init_vm->xfam = tdx_guest->xfam;
626 
627     /*
628      * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE)
629      * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or
630      * RDSEED) is busy.
631      *
632      * Retry for the case.
633      */
634     do {
635         error_free(local_err);
636         local_err = NULL;
637         r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err);
638     } while (r == -EAGAIN && --retry);
639 
640     if (r < 0) {
641         if (!retry) {
642             error_append_hint(&local_err, "Hardware RNG (Random Number "
643             "Generator) is busy occupied by someone (via RDRAND/RDSEED) "
644             "maliciously, which leads to KVM_TDX_INIT_VM keeping failure "
645             "due to lack of entropy.\n");
646         }
647         error_propagate(errp, local_err);
648         return r;
649     }
650 
651     tdx_guest->initialized = true;
652 
653     return 0;
654 }
655 
656 int tdx_parse_tdvf(void *flash_ptr, int size)
657 {
658     return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
659 }
660 
661 static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
662                                         char *message, uint64_t gpa)
663 {
664     GuestPanicInformation *panic_info;
665 
666     panic_info = g_new0(GuestPanicInformation, 1);
667     panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX;
668     panic_info->u.tdx.error_code = (uint32_t) error_code;
669     panic_info->u.tdx.message = message;
670     panic_info->u.tdx.gpa = gpa;
671 
672     qemu_system_guest_panicked(panic_info);
673 }
674 
675 /*
676  * Only 8 registers can contain valid ASCII byte stream to form the fatal
677  * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX
678  */
679 #define TDX_FATAL_MESSAGE_MAX        64
680 
681 #define TDX_REPORT_FATAL_ERROR_GPA_VALID    BIT_ULL(63)
682 
683 int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
684 {
685     uint64_t error_code = run->system_event.data[R_R12];
686     uint64_t reg_mask = run->system_event.data[R_ECX];
687     char *message = NULL;
688     uint64_t *tmp;
689     uint64_t gpa = -1ull;
690 
691     if (error_code & 0xffff) {
692         error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%lx",
693                      error_code);
694         return -1;
695     }
696 
697     if (reg_mask) {
698         message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1);
699         tmp = (uint64_t *)message;
700 
701 #define COPY_REG(REG)                               \
702     do {                                            \
703         if (reg_mask & BIT_ULL(REG)) {              \
704             *(tmp++) = run->system_event.data[REG]; \
705         }                                           \
706     } while (0)
707 
708         COPY_REG(R_R14);
709         COPY_REG(R_R15);
710         COPY_REG(R_EBX);
711         COPY_REG(R_EDI);
712         COPY_REG(R_ESI);
713         COPY_REG(R_R8);
714         COPY_REG(R_R9);
715         COPY_REG(R_EDX);
716         *((char *)tmp) = '\0';
717     }
718 #undef COPY_REG
719 
720     if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) {
721         gpa = run->system_event.data[R_R13];
722     }
723 
724     tdx_panicked_on_fatal_error(cpu, error_code, message, gpa);
725 
726     return -1;
727 }
728 
729 static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp)
730 {
731     TdxGuest *tdx = TDX_GUEST(obj);
732 
733     return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE);
734 }
735 
736 static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp)
737 {
738     TdxGuest *tdx = TDX_GUEST(obj);
739 
740     if (value) {
741         tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
742     } else {
743         tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
744     }
745 }
746 
747 static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp)
748 {
749     TdxGuest *tdx = TDX_GUEST(obj);
750 
751     return g_strdup(tdx->mrconfigid);
752 }
753 
754 static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp)
755 {
756     TdxGuest *tdx = TDX_GUEST(obj);
757 
758     g_free(tdx->mrconfigid);
759     tdx->mrconfigid = g_strdup(value);
760 }
761 
762 static char *tdx_guest_get_mrowner(Object *obj, Error **errp)
763 {
764     TdxGuest *tdx = TDX_GUEST(obj);
765 
766     return g_strdup(tdx->mrowner);
767 }
768 
769 static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp)
770 {
771     TdxGuest *tdx = TDX_GUEST(obj);
772 
773     g_free(tdx->mrowner);
774     tdx->mrowner = g_strdup(value);
775 }
776 
777 static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp)
778 {
779     TdxGuest *tdx = TDX_GUEST(obj);
780 
781     return g_strdup(tdx->mrownerconfig);
782 }
783 
784 static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp)
785 {
786     TdxGuest *tdx = TDX_GUEST(obj);
787 
788     g_free(tdx->mrownerconfig);
789     tdx->mrownerconfig = g_strdup(value);
790 }
791 
792 /* tdx guest */
793 OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
794                                    tdx_guest,
795                                    TDX_GUEST,
796                                    X86_CONFIDENTIAL_GUEST,
797                                    { TYPE_USER_CREATABLE },
798                                    { NULL })
799 
800 static void tdx_guest_init(Object *obj)
801 {
802     ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
803     TdxGuest *tdx = TDX_GUEST(obj);
804 
805     qemu_mutex_init(&tdx->lock);
806 
807     cgs->require_guest_memfd = true;
808     tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
809 
810     object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
811                                    OBJ_PROP_FLAG_READWRITE);
812     object_property_add_bool(obj, "sept-ve-disable",
813                              tdx_guest_get_sept_ve_disable,
814                              tdx_guest_set_sept_ve_disable);
815     object_property_add_str(obj, "mrconfigid",
816                             tdx_guest_get_mrconfigid,
817                             tdx_guest_set_mrconfigid);
818     object_property_add_str(obj, "mrowner",
819                             tdx_guest_get_mrowner, tdx_guest_set_mrowner);
820     object_property_add_str(obj, "mrownerconfig",
821                             tdx_guest_get_mrownerconfig,
822                             tdx_guest_set_mrownerconfig);
823 }
824 
825 static void tdx_guest_finalize(Object *obj)
826 {
827 }
828 
829 static void tdx_guest_class_init(ObjectClass *oc, const void *data)
830 {
831     ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
832     X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
833 
834     klass->kvm_init = tdx_kvm_init;
835     x86_klass->kvm_type = tdx_kvm_type;
836     x86_klass->cpu_instance_init = tdx_cpu_instance_init;
837 }
838