1756e12e7SXiaoyao Li /*
2756e12e7SXiaoyao Li * QEMU TDX support
3756e12e7SXiaoyao Li *
4756e12e7SXiaoyao Li * Copyright (c) 2025 Intel Corporation
5756e12e7SXiaoyao Li *
6756e12e7SXiaoyao Li * Author:
7756e12e7SXiaoyao Li * Xiaoyao Li <xiaoyao.li@intel.com>
8756e12e7SXiaoyao Li *
9756e12e7SXiaoyao Li * SPDX-License-Identifier: GPL-2.0-or-later
10756e12e7SXiaoyao Li */
11756e12e7SXiaoyao Li
12756e12e7SXiaoyao Li #include "qemu/osdep.h"
138eddedc3SXiaoyao Li #include "qemu/error-report.h"
14d05a0858SIsaku Yamahata #include "qemu/base64.h"
154420ba0eSXiaoyao Li #include "qemu/mmap-alloc.h"
168eddedc3SXiaoyao Li #include "qapi/error.h"
17756e12e7SXiaoyao Li #include "qom/object_interfaces.h"
18d05a0858SIsaku Yamahata #include "crypto/hash.h"
19bb45580dSXiaoyao Li #include "system/kvm_int.h"
206e250463SXiaoyao Li #include "system/runstate.h"
214420ba0eSXiaoyao Li #include "system/system.h"
22ebc2d2b4SIsaku Yamahata #include "system/ramblock.h"
23756e12e7SXiaoyao Li
241ff5048dSXiaoyao Li #include <linux/kvm_para.h>
251ff5048dSXiaoyao Li
268c94c84cSXiaoyao Li #include "cpu.h"
278c94c84cSXiaoyao Li #include "cpu-internal.h"
28907ee7b6SXiaoyao Li #include "host-cpu.h"
29f18672e4SXiaoyao Li #include "hw/i386/e820_memory_layout.h"
304420ba0eSXiaoyao Li #include "hw/i386/tdvf.h"
31631a2ac5SXiaoyao Li #include "hw/i386/x86.h"
32a7314259SXiaoyao Li #include "hw/i386/tdvf-hob.h"
33b455880eSXiaoyao Li #include "kvm_i386.h"
34756e12e7SXiaoyao Li #include "tdx.h"
35756e12e7SXiaoyao Li
364d6e288aSXiaoyao Li #include "standard-headers/asm-x86/kvm_para.h"
374d6e288aSXiaoyao Li
380e73b843SXiaoyao Li #define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000)
390e73b843SXiaoyao Li #define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000)
400e73b843SXiaoyao Li
4153b6f406SXiaoyao Li #define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0)
426016e297SXiaoyao Li #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28)
43bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30)
44bb3be394SXiaoyao Li #define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63)
456016e297SXiaoyao Li
4653b6f406SXiaoyao Li #define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\
4753b6f406SXiaoyao Li TDX_TD_ATTRIBUTES_PKS | \
4853b6f406SXiaoyao Li TDX_TD_ATTRIBUTES_PERFMON)
4953b6f406SXiaoyao Li
504d6e288aSXiaoyao Li #define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \
514d6e288aSXiaoyao Li (1U << KVM_FEATURE_PV_UNHALT) | \
524d6e288aSXiaoyao Li (1U << KVM_FEATURE_PV_TLB_FLUSH) | \
534d6e288aSXiaoyao Li (1U << KVM_FEATURE_PV_SEND_IPI) | \
544d6e288aSXiaoyao Li (1U << KVM_FEATURE_POLL_CONTROL) | \
554d6e288aSXiaoyao Li (1U << KVM_FEATURE_PV_SCHED_YIELD) | \
564d6e288aSXiaoyao Li (1U << KVM_FEATURE_MSI_EXT_DEST_ID))
574d6e288aSXiaoyao Li
581619d0e4SXiaoyao Li static TdxGuest *tdx_guest;
591619d0e4SXiaoyao Li
608eddedc3SXiaoyao Li static struct kvm_tdx_capabilities *tdx_caps;
6175ec6189SXiaoyao Li static struct kvm_cpuid2 *tdx_supported_cpuid;
628eddedc3SXiaoyao Li
631619d0e4SXiaoyao Li /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */
is_tdx_vm(void)641619d0e4SXiaoyao Li bool is_tdx_vm(void)
651619d0e4SXiaoyao Li {
661619d0e4SXiaoyao Li return !!tdx_guest;
671619d0e4SXiaoyao Li }
681619d0e4SXiaoyao Li
698eddedc3SXiaoyao Li enum tdx_ioctl_level {
708eddedc3SXiaoyao Li TDX_VM_IOCTL,
718eddedc3SXiaoyao Li TDX_VCPU_IOCTL,
728eddedc3SXiaoyao Li };
738eddedc3SXiaoyao Li
tdx_ioctl_internal(enum tdx_ioctl_level level,void * state,int cmd_id,__u32 flags,void * data,Error ** errp)748eddedc3SXiaoyao Li static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state,
758eddedc3SXiaoyao Li int cmd_id, __u32 flags, void *data,
768eddedc3SXiaoyao Li Error **errp)
77631a2ac5SXiaoyao Li {
788eddedc3SXiaoyao Li struct kvm_tdx_cmd tdx_cmd = {};
798eddedc3SXiaoyao Li int r;
808eddedc3SXiaoyao Li
818eddedc3SXiaoyao Li const char *tdx_ioctl_name[] = {
828eddedc3SXiaoyao Li [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES",
838eddedc3SXiaoyao Li [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM",
848eddedc3SXiaoyao Li [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU",
858eddedc3SXiaoyao Li [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION",
868eddedc3SXiaoyao Li [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM",
878eddedc3SXiaoyao Li [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID",
888eddedc3SXiaoyao Li };
898eddedc3SXiaoyao Li
908eddedc3SXiaoyao Li tdx_cmd.id = cmd_id;
918eddedc3SXiaoyao Li tdx_cmd.flags = flags;
928eddedc3SXiaoyao Li tdx_cmd.data = (__u64)(unsigned long)data;
938eddedc3SXiaoyao Li
948eddedc3SXiaoyao Li switch (level) {
958eddedc3SXiaoyao Li case TDX_VM_IOCTL:
968eddedc3SXiaoyao Li r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
978eddedc3SXiaoyao Li break;
988eddedc3SXiaoyao Li case TDX_VCPU_IOCTL:
998eddedc3SXiaoyao Li r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
1008eddedc3SXiaoyao Li break;
1018eddedc3SXiaoyao Li default:
1028eddedc3SXiaoyao Li error_setg(errp, "Invalid tdx_ioctl_level %d", level);
1038eddedc3SXiaoyao Li return -EINVAL;
1048eddedc3SXiaoyao Li }
1058eddedc3SXiaoyao Li
1068eddedc3SXiaoyao Li if (r < 0) {
1078eddedc3SXiaoyao Li error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx",
1088eddedc3SXiaoyao Li tdx_ioctl_name[cmd_id], tdx_cmd.hw_error);
1098eddedc3SXiaoyao Li }
1108eddedc3SXiaoyao Li return r;
1118eddedc3SXiaoyao Li }
1128eddedc3SXiaoyao Li
tdx_vm_ioctl(int cmd_id,__u32 flags,void * data,Error ** errp)1138eddedc3SXiaoyao Li static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data,
1148eddedc3SXiaoyao Li Error **errp)
1158eddedc3SXiaoyao Li {
1168eddedc3SXiaoyao Li return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp);
1178eddedc3SXiaoyao Li }
1188eddedc3SXiaoyao Li
tdx_vcpu_ioctl(CPUState * cpu,int cmd_id,__u32 flags,void * data,Error ** errp)1198eddedc3SXiaoyao Li static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags,
1208eddedc3SXiaoyao Li void *data, Error **errp)
1218eddedc3SXiaoyao Li {
1228eddedc3SXiaoyao Li return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp);
1238eddedc3SXiaoyao Li }
1248eddedc3SXiaoyao Li
get_tdx_capabilities(Error ** errp)1258eddedc3SXiaoyao Li static int get_tdx_capabilities(Error **errp)
1268eddedc3SXiaoyao Li {
1278eddedc3SXiaoyao Li struct kvm_tdx_capabilities *caps;
1288eddedc3SXiaoyao Li /* 1st generation of TDX reports 6 cpuid configs */
1298eddedc3SXiaoyao Li int nr_cpuid_configs = 6;
1308eddedc3SXiaoyao Li size_t size;
1318eddedc3SXiaoyao Li int r;
1328eddedc3SXiaoyao Li
1338eddedc3SXiaoyao Li do {
1348eddedc3SXiaoyao Li Error *local_err = NULL;
1358eddedc3SXiaoyao Li size = sizeof(struct kvm_tdx_capabilities) +
1368eddedc3SXiaoyao Li nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2);
1378eddedc3SXiaoyao Li caps = g_malloc0(size);
1388eddedc3SXiaoyao Li caps->cpuid.nent = nr_cpuid_configs;
1398eddedc3SXiaoyao Li
1408eddedc3SXiaoyao Li r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err);
1418eddedc3SXiaoyao Li if (r == -E2BIG) {
1428eddedc3SXiaoyao Li g_free(caps);
1438eddedc3SXiaoyao Li nr_cpuid_configs *= 2;
1448eddedc3SXiaoyao Li if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) {
1458eddedc3SXiaoyao Li error_report("KVM TDX seems broken that number of CPUID entries"
1468eddedc3SXiaoyao Li " in kvm_tdx_capabilities exceeds limit: %d",
1478eddedc3SXiaoyao Li KVM_MAX_CPUID_ENTRIES);
1488eddedc3SXiaoyao Li error_propagate(errp, local_err);
1498eddedc3SXiaoyao Li return r;
1508eddedc3SXiaoyao Li }
1518eddedc3SXiaoyao Li error_free(local_err);
1528eddedc3SXiaoyao Li } else if (r < 0) {
1538eddedc3SXiaoyao Li g_free(caps);
1548eddedc3SXiaoyao Li error_propagate(errp, local_err);
1558eddedc3SXiaoyao Li return r;
1568eddedc3SXiaoyao Li }
1578eddedc3SXiaoyao Li } while (r == -E2BIG);
1588eddedc3SXiaoyao Li
1598eddedc3SXiaoyao Li tdx_caps = caps;
160631a2ac5SXiaoyao Li
161631a2ac5SXiaoyao Li return 0;
162631a2ac5SXiaoyao Li }
163631a2ac5SXiaoyao Li
tdx_set_tdvf_region(MemoryRegion * tdvf_mr)1640dd5fe5eSChao Peng void tdx_set_tdvf_region(MemoryRegion *tdvf_mr)
1650dd5fe5eSChao Peng {
1660dd5fe5eSChao Peng assert(!tdx_guest->tdvf_mr);
1670dd5fe5eSChao Peng tdx_guest->tdvf_mr = tdvf_mr;
1680dd5fe5eSChao Peng }
1690dd5fe5eSChao Peng
tdx_get_hob_entry(TdxGuest * tdx)170a7314259SXiaoyao Li static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx)
171a7314259SXiaoyao Li {
172a7314259SXiaoyao Li TdxFirmwareEntry *entry;
173a7314259SXiaoyao Li
174a7314259SXiaoyao Li for_each_tdx_fw_entry(&tdx->tdvf, entry) {
175a7314259SXiaoyao Li if (entry->type == TDVF_SECTION_TYPE_TD_HOB) {
176a7314259SXiaoyao Li return entry;
177a7314259SXiaoyao Li }
178a7314259SXiaoyao Li }
179a7314259SXiaoyao Li error_report("TDVF metadata doesn't specify TD_HOB location.");
180a7314259SXiaoyao Li exit(1);
181a7314259SXiaoyao Li }
182a7314259SXiaoyao Li
tdx_add_ram_entry(uint64_t address,uint64_t length,enum TdxRamType type)183f18672e4SXiaoyao Li static void tdx_add_ram_entry(uint64_t address, uint64_t length,
184f18672e4SXiaoyao Li enum TdxRamType type)
185f18672e4SXiaoyao Li {
186f18672e4SXiaoyao Li uint32_t nr_entries = tdx_guest->nr_ram_entries;
187f18672e4SXiaoyao Li tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries,
188f18672e4SXiaoyao Li nr_entries + 1);
189f18672e4SXiaoyao Li
190f18672e4SXiaoyao Li tdx_guest->ram_entries[nr_entries].address = address;
191f18672e4SXiaoyao Li tdx_guest->ram_entries[nr_entries].length = length;
192f18672e4SXiaoyao Li tdx_guest->ram_entries[nr_entries].type = type;
193f18672e4SXiaoyao Li tdx_guest->nr_ram_entries++;
194f18672e4SXiaoyao Li }
195f18672e4SXiaoyao Li
tdx_accept_ram_range(uint64_t address,uint64_t length)196f18672e4SXiaoyao Li static int tdx_accept_ram_range(uint64_t address, uint64_t length)
197f18672e4SXiaoyao Li {
198f18672e4SXiaoyao Li uint64_t head_start, tail_start, head_length, tail_length;
199f18672e4SXiaoyao Li uint64_t tmp_address, tmp_length;
200f18672e4SXiaoyao Li TdxRamEntry *e;
201f18672e4SXiaoyao Li int i = 0;
202f18672e4SXiaoyao Li
203f18672e4SXiaoyao Li do {
204f18672e4SXiaoyao Li if (i == tdx_guest->nr_ram_entries) {
205f18672e4SXiaoyao Li return -1;
206f18672e4SXiaoyao Li }
207f18672e4SXiaoyao Li
208f18672e4SXiaoyao Li e = &tdx_guest->ram_entries[i++];
209f18672e4SXiaoyao Li } while (address + length <= e->address || address >= e->address + e->length);
210f18672e4SXiaoyao Li
211f18672e4SXiaoyao Li /*
212f18672e4SXiaoyao Li * The to-be-accepted ram range must be fully contained by one
213f18672e4SXiaoyao Li * RAM entry.
214f18672e4SXiaoyao Li */
215f18672e4SXiaoyao Li if (e->address > address ||
216f18672e4SXiaoyao Li e->address + e->length < address + length) {
217f18672e4SXiaoyao Li return -1;
218f18672e4SXiaoyao Li }
219f18672e4SXiaoyao Li
220f18672e4SXiaoyao Li if (e->type == TDX_RAM_ADDED) {
221f18672e4SXiaoyao Li return 0;
222f18672e4SXiaoyao Li }
223f18672e4SXiaoyao Li
224f18672e4SXiaoyao Li tmp_address = e->address;
225f18672e4SXiaoyao Li tmp_length = e->length;
226f18672e4SXiaoyao Li
227f18672e4SXiaoyao Li e->address = address;
228f18672e4SXiaoyao Li e->length = length;
229f18672e4SXiaoyao Li e->type = TDX_RAM_ADDED;
230f18672e4SXiaoyao Li
231f18672e4SXiaoyao Li head_length = address - tmp_address;
232f18672e4SXiaoyao Li if (head_length > 0) {
233f18672e4SXiaoyao Li head_start = tmp_address;
234f18672e4SXiaoyao Li tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED);
235f18672e4SXiaoyao Li }
236f18672e4SXiaoyao Li
237f18672e4SXiaoyao Li tail_start = address + length;
238f18672e4SXiaoyao Li if (tail_start < tmp_address + tmp_length) {
239f18672e4SXiaoyao Li tail_length = tmp_address + tmp_length - tail_start;
240f18672e4SXiaoyao Li tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED);
241f18672e4SXiaoyao Li }
242f18672e4SXiaoyao Li
243f18672e4SXiaoyao Li return 0;
244f18672e4SXiaoyao Li }
245f18672e4SXiaoyao Li
tdx_ram_entry_compare(const void * lhs_,const void * rhs_)246f18672e4SXiaoyao Li static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_)
247f18672e4SXiaoyao Li {
248f18672e4SXiaoyao Li const TdxRamEntry *lhs = lhs_;
249f18672e4SXiaoyao Li const TdxRamEntry *rhs = rhs_;
250f18672e4SXiaoyao Li
251f18672e4SXiaoyao Li if (lhs->address == rhs->address) {
252f18672e4SXiaoyao Li return 0;
253f18672e4SXiaoyao Li }
254f18672e4SXiaoyao Li if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) {
255f18672e4SXiaoyao Li return 1;
256f18672e4SXiaoyao Li }
257f18672e4SXiaoyao Li return -1;
258f18672e4SXiaoyao Li }
259f18672e4SXiaoyao Li
tdx_init_ram_entries(void)260f18672e4SXiaoyao Li static void tdx_init_ram_entries(void)
261f18672e4SXiaoyao Li {
262f18672e4SXiaoyao Li unsigned i, j, nr_e820_entries;
263f18672e4SXiaoyao Li
264f18672e4SXiaoyao Li nr_e820_entries = e820_get_table(NULL);
265f18672e4SXiaoyao Li tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries);
266f18672e4SXiaoyao Li
267f18672e4SXiaoyao Li for (i = 0, j = 0; i < nr_e820_entries; i++) {
268f18672e4SXiaoyao Li uint64_t addr, len;
269f18672e4SXiaoyao Li
270f18672e4SXiaoyao Li if (e820_get_entry(i, E820_RAM, &addr, &len)) {
271f18672e4SXiaoyao Li tdx_guest->ram_entries[j].address = addr;
272f18672e4SXiaoyao Li tdx_guest->ram_entries[j].length = len;
273f18672e4SXiaoyao Li tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED;
274f18672e4SXiaoyao Li j++;
275f18672e4SXiaoyao Li }
276f18672e4SXiaoyao Li }
277f18672e4SXiaoyao Li tdx_guest->nr_ram_entries = j;
278f18672e4SXiaoyao Li }
279f18672e4SXiaoyao Li
tdx_post_init_vcpus(void)28041f7fd22SXiaoyao Li static void tdx_post_init_vcpus(void)
28141f7fd22SXiaoyao Li {
28241f7fd22SXiaoyao Li TdxFirmwareEntry *hob;
28341f7fd22SXiaoyao Li CPUState *cpu;
28441f7fd22SXiaoyao Li
28541f7fd22SXiaoyao Li hob = tdx_get_hob_entry(tdx_guest);
28641f7fd22SXiaoyao Li CPU_FOREACH(cpu) {
287*e7f926ebSCédric Le Goater tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address,
28841f7fd22SXiaoyao Li &error_fatal);
28941f7fd22SXiaoyao Li }
29041f7fd22SXiaoyao Li }
29141f7fd22SXiaoyao Li
tdx_finalize_vm(Notifier * notifier,void * unused)2924420ba0eSXiaoyao Li static void tdx_finalize_vm(Notifier *notifier, void *unused)
2934420ba0eSXiaoyao Li {
2944420ba0eSXiaoyao Li TdxFirmware *tdvf = &tdx_guest->tdvf;
2954420ba0eSXiaoyao Li TdxFirmwareEntry *entry;
296ebc2d2b4SIsaku Yamahata RAMBlock *ram_block;
297ebc2d2b4SIsaku Yamahata Error *local_err = NULL;
298ebc2d2b4SIsaku Yamahata int r;
2994420ba0eSXiaoyao Li
300f18672e4SXiaoyao Li tdx_init_ram_entries();
301f18672e4SXiaoyao Li
3024420ba0eSXiaoyao Li for_each_tdx_fw_entry(tdvf, entry) {
3034420ba0eSXiaoyao Li switch (entry->type) {
3044420ba0eSXiaoyao Li case TDVF_SECTION_TYPE_BFV:
3054420ba0eSXiaoyao Li case TDVF_SECTION_TYPE_CFV:
3064420ba0eSXiaoyao Li entry->mem_ptr = tdvf->mem_ptr + entry->data_offset;
3074420ba0eSXiaoyao Li break;
3084420ba0eSXiaoyao Li case TDVF_SECTION_TYPE_TD_HOB:
3094420ba0eSXiaoyao Li case TDVF_SECTION_TYPE_TEMP_MEM:
3104420ba0eSXiaoyao Li entry->mem_ptr = qemu_ram_mmap(-1, entry->size,
3114420ba0eSXiaoyao Li qemu_real_host_page_size(), 0, 0);
3124420ba0eSXiaoyao Li if (entry->mem_ptr == MAP_FAILED) {
3134420ba0eSXiaoyao Li error_report("Failed to mmap memory for TDVF section %d",
3144420ba0eSXiaoyao Li entry->type);
3154420ba0eSXiaoyao Li exit(1);
3164420ba0eSXiaoyao Li }
317f18672e4SXiaoyao Li if (tdx_accept_ram_range(entry->address, entry->size)) {
318f18672e4SXiaoyao Li error_report("Failed to accept memory for TDVF section %d",
319f18672e4SXiaoyao Li entry->type);
320f18672e4SXiaoyao Li qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
321f18672e4SXiaoyao Li exit(1);
322f18672e4SXiaoyao Li }
3234420ba0eSXiaoyao Li break;
3244420ba0eSXiaoyao Li default:
3254420ba0eSXiaoyao Li error_report("Unsupported TDVF section %d", entry->type);
3264420ba0eSXiaoyao Li exit(1);
3274420ba0eSXiaoyao Li }
3284420ba0eSXiaoyao Li }
329f18672e4SXiaoyao Li
330f18672e4SXiaoyao Li qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries,
331f18672e4SXiaoyao Li sizeof(TdxRamEntry), &tdx_ram_entry_compare);
332a7314259SXiaoyao Li
333a7314259SXiaoyao Li tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest));
334ebc2d2b4SIsaku Yamahata
33541f7fd22SXiaoyao Li tdx_post_init_vcpus();
33641f7fd22SXiaoyao Li
337ebc2d2b4SIsaku Yamahata for_each_tdx_fw_entry(tdvf, entry) {
338ebc2d2b4SIsaku Yamahata struct kvm_tdx_init_mem_region region;
339ebc2d2b4SIsaku Yamahata uint32_t flags;
340ebc2d2b4SIsaku Yamahata
341ebc2d2b4SIsaku Yamahata region = (struct kvm_tdx_init_mem_region) {
342*e7f926ebSCédric Le Goater .source_addr = (uintptr_t)entry->mem_ptr,
343ebc2d2b4SIsaku Yamahata .gpa = entry->address,
344ebc2d2b4SIsaku Yamahata .nr_pages = entry->size >> 12,
345ebc2d2b4SIsaku Yamahata };
346ebc2d2b4SIsaku Yamahata
347ebc2d2b4SIsaku Yamahata flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ?
348ebc2d2b4SIsaku Yamahata KVM_TDX_MEASURE_MEMORY_REGION : 0;
349ebc2d2b4SIsaku Yamahata
350ebc2d2b4SIsaku Yamahata do {
351ebc2d2b4SIsaku Yamahata error_free(local_err);
352ebc2d2b4SIsaku Yamahata local_err = NULL;
353ebc2d2b4SIsaku Yamahata r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags,
354ebc2d2b4SIsaku Yamahata ®ion, &local_err);
355ebc2d2b4SIsaku Yamahata } while (r == -EAGAIN || r == -EINTR);
356ebc2d2b4SIsaku Yamahata if (r < 0) {
357ebc2d2b4SIsaku Yamahata error_report_err(local_err);
358ebc2d2b4SIsaku Yamahata exit(1);
359ebc2d2b4SIsaku Yamahata }
360ebc2d2b4SIsaku Yamahata
361ebc2d2b4SIsaku Yamahata if (entry->type == TDVF_SECTION_TYPE_TD_HOB ||
362ebc2d2b4SIsaku Yamahata entry->type == TDVF_SECTION_TYPE_TEMP_MEM) {
363ebc2d2b4SIsaku Yamahata qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
364ebc2d2b4SIsaku Yamahata entry->mem_ptr = NULL;
365ebc2d2b4SIsaku Yamahata }
366ebc2d2b4SIsaku Yamahata }
367ebc2d2b4SIsaku Yamahata
368ebc2d2b4SIsaku Yamahata /*
369ebc2d2b4SIsaku Yamahata * TDVF image has been copied into private region above via
370ebc2d2b4SIsaku Yamahata * KVM_MEMORY_MAPPING. It becomes useless.
371ebc2d2b4SIsaku Yamahata */
372ebc2d2b4SIsaku Yamahata ram_block = tdx_guest->tdvf_mr->ram_block;
373ebc2d2b4SIsaku Yamahata ram_block_discard_range(ram_block, 0, ram_block->max_length);
374ae60ff4eSXiaoyao Li
375ae60ff4eSXiaoyao Li tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal);
376ae60ff4eSXiaoyao Li CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true;
3774420ba0eSXiaoyao Li }
3784420ba0eSXiaoyao Li
3794420ba0eSXiaoyao Li static Notifier tdx_machine_done_notify = {
3804420ba0eSXiaoyao Li .notify = tdx_finalize_vm,
3814420ba0eSXiaoyao Li };
3824420ba0eSXiaoyao Li
3830ba06e46SXiaoyao Li /*
3840ba06e46SXiaoyao Li * Some CPUID bits change from fixed1 to configurable bits when TDX module
3850ba06e46SXiaoyao Li * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY.
3860ba06e46SXiaoyao Li *
3870ba06e46SXiaoyao Li * To make QEMU work with all the versions of TDX module, keep the fixed1 bits
3880ba06e46SXiaoyao Li * here if they are ever fixed1 bits in any of the version though not fixed1 in
3890ba06e46SXiaoyao Li * the latest version. Otherwise, with the older version of TDX module, QEMU may
3900ba06e46SXiaoyao Li * treat the fixed1 bit as unsupported.
3910ba06e46SXiaoyao Li *
3920ba06e46SXiaoyao Li * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even
3930ba06e46SXiaoyao Li * though they changed to configurable bits. Because tdx_fixed1_bits is used to
3940ba06e46SXiaoyao Li * setup the supported bits.
3950ba06e46SXiaoyao Li */
3960ba06e46SXiaoyao Li KvmCpuidInfo tdx_fixed1_bits = {
3970ba06e46SXiaoyao Li .cpuid.nent = 8,
3980ba06e46SXiaoyao Li .entries[0] = {
3990ba06e46SXiaoyao Li .function = 0x1,
4000ba06e46SXiaoyao Li .index = 0,
4010ba06e46SXiaoyao Li .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 |
4020ba06e46SXiaoyao Li CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 |
4030ba06e46SXiaoyao Li CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
4040ba06e46SXiaoyao Li CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
4050ba06e46SXiaoyao Li CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE |
4060ba06e46SXiaoyao Li CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR,
4070ba06e46SXiaoyao Li .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
4080ba06e46SXiaoyao Li CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
4090ba06e46SXiaoyao Li CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
4100ba06e46SXiaoyao Li CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR |
4110ba06e46SXiaoyao Li CPUID_SSE | CPUID_SSE2,
4120ba06e46SXiaoyao Li },
4130ba06e46SXiaoyao Li .entries[1] = {
4140ba06e46SXiaoyao Li .function = 0x6,
4150ba06e46SXiaoyao Li .index = 0,
4160ba06e46SXiaoyao Li .eax = CPUID_6_EAX_ARAT,
4170ba06e46SXiaoyao Li },
4180ba06e46SXiaoyao Li .entries[2] = {
4190ba06e46SXiaoyao Li .function = 0x7,
4200ba06e46SXiaoyao Li .index = 0,
4210ba06e46SXiaoyao Li .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
4220ba06e46SXiaoyao Li .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY |
4230ba06e46SXiaoyao Li CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID |
4240ba06e46SXiaoyao Li CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED |
4250ba06e46SXiaoyao Li CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT |
4260ba06e46SXiaoyao Li CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI,
4270ba06e46SXiaoyao Li .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI |
4280ba06e46SXiaoyao Li CPUID_7_0_ECX_MOVDIR64B,
4290ba06e46SXiaoyao Li .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL |
4300ba06e46SXiaoyao Li CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D |
4310ba06e46SXiaoyao Li CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY |
4320ba06e46SXiaoyao Li CPUID_7_0_EDX_SPEC_CTRL_SSBD,
4330ba06e46SXiaoyao Li },
4340ba06e46SXiaoyao Li .entries[3] = {
4350ba06e46SXiaoyao Li .function = 0x7,
4360ba06e46SXiaoyao Li .index = 2,
4370ba06e46SXiaoyao Li .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
4380ba06e46SXiaoyao Li .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL |
4390ba06e46SXiaoyao Li CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL,
4400ba06e46SXiaoyao Li },
4410ba06e46SXiaoyao Li .entries[4] = {
4420ba06e46SXiaoyao Li .function = 0xD,
4430ba06e46SXiaoyao Li .index = 0,
4440ba06e46SXiaoyao Li .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
4450ba06e46SXiaoyao Li .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK,
4460ba06e46SXiaoyao Li },
4470ba06e46SXiaoyao Li .entries[5] = {
4480ba06e46SXiaoyao Li .function = 0xD,
4490ba06e46SXiaoyao Li .index = 1,
4500ba06e46SXiaoyao Li .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
4510ba06e46SXiaoyao Li .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC|
4520ba06e46SXiaoyao Li CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES,
4530ba06e46SXiaoyao Li },
4540ba06e46SXiaoyao Li .entries[6] = {
4550ba06e46SXiaoyao Li .function = 0x80000001,
4560ba06e46SXiaoyao Li .index = 0,
4570ba06e46SXiaoyao Li .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
4580ba06e46SXiaoyao Li /*
4590ba06e46SXiaoyao Li * Strictly speaking, SYSCALL is not fixed1 bit since it depends on
4600ba06e46SXiaoyao Li * the CPU to be in 64-bit mode. But here fixed1 is used to serve the
4610ba06e46SXiaoyao Li * purpose of supported bits for TDX. In this sense, SYACALL is always
4620ba06e46SXiaoyao Li * supported.
4630ba06e46SXiaoyao Li */
4640ba06e46SXiaoyao Li .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
4650ba06e46SXiaoyao Li CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
4660ba06e46SXiaoyao Li },
4670ba06e46SXiaoyao Li .entries[7] = {
4680ba06e46SXiaoyao Li .function = 0x80000007,
4690ba06e46SXiaoyao Li .index = 0,
4700ba06e46SXiaoyao Li .edx = CPUID_APM_INVTSC,
4710ba06e46SXiaoyao Li },
4720ba06e46SXiaoyao Li };
4730ba06e46SXiaoyao Li
47431df29c5SXiaoyao Li typedef struct TdxAttrsMap {
47531df29c5SXiaoyao Li uint32_t attr_index;
47631df29c5SXiaoyao Li uint32_t cpuid_leaf;
47731df29c5SXiaoyao Li uint32_t cpuid_subleaf;
47831df29c5SXiaoyao Li int cpuid_reg;
47931df29c5SXiaoyao Li uint32_t feat_mask;
48031df29c5SXiaoyao Li } TdxAttrsMap;
48131df29c5SXiaoyao Li
48231df29c5SXiaoyao Li static TdxAttrsMap tdx_attrs_maps[] = {
48331df29c5SXiaoyao Li {.attr_index = 27,
48431df29c5SXiaoyao Li .cpuid_leaf = 7,
48531df29c5SXiaoyao Li .cpuid_subleaf = 1,
48631df29c5SXiaoyao Li .cpuid_reg = R_EAX,
48731df29c5SXiaoyao Li .feat_mask = CPUID_7_1_EAX_LASS,},
48831df29c5SXiaoyao Li
48931df29c5SXiaoyao Li {.attr_index = 30,
49031df29c5SXiaoyao Li .cpuid_leaf = 7,
49131df29c5SXiaoyao Li .cpuid_subleaf = 0,
49231df29c5SXiaoyao Li .cpuid_reg = R_ECX,
49331df29c5SXiaoyao Li .feat_mask = CPUID_7_0_ECX_PKS,},
49431df29c5SXiaoyao Li
49531df29c5SXiaoyao Li {.attr_index = 31,
49631df29c5SXiaoyao Li .cpuid_leaf = 7,
49731df29c5SXiaoyao Li .cpuid_subleaf = 0,
49831df29c5SXiaoyao Li .cpuid_reg = R_ECX,
49931df29c5SXiaoyao Li .feat_mask = CPUID_7_0_ECX_KeyLocker,},
50031df29c5SXiaoyao Li };
50131df29c5SXiaoyao Li
5028c94c84cSXiaoyao Li typedef struct TdxXFAMDep {
5038c94c84cSXiaoyao Li int xfam_bit;
5048c94c84cSXiaoyao Li FeatureMask feat_mask;
5058c94c84cSXiaoyao Li } TdxXFAMDep;
5068c94c84cSXiaoyao Li
5078c94c84cSXiaoyao Li /*
5088c94c84cSXiaoyao Li * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are
5098c94c84cSXiaoyao Li * defiend here.
5108c94c84cSXiaoyao Li *
5118c94c84cSXiaoyao Li * For those whose virtualization type are "XFAM & Configured & Native", they
5128c94c84cSXiaoyao Li * are reported as configurable bits. And they are not supported if not in the
5138c94c84cSXiaoyao Li * configureable bits list from KVM even if the corresponding XFAM bit is
5148c94c84cSXiaoyao Li * supported.
5158c94c84cSXiaoyao Li */
5168c94c84cSXiaoyao Li TdxXFAMDep tdx_xfam_deps[] = {
5178c94c84cSXiaoyao Li { XSTATE_YMM_BIT, { FEAT_1_ECX, CPUID_EXT_FMA }},
5188c94c84cSXiaoyao Li { XSTATE_YMM_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }},
5198c94c84cSXiaoyao Li { XSTATE_OPMASK_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}},
5208c94c84cSXiaoyao Li { XSTATE_OPMASK_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}},
5218c94c84cSXiaoyao Li { XSTATE_PT_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}},
5228c94c84cSXiaoyao Li { XSTATE_PKRU_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}},
5238c94c84cSXiaoyao Li { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }},
5248c94c84cSXiaoyao Li { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }},
5258c94c84cSXiaoyao Li { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }},
5268c94c84cSXiaoyao Li };
5278c94c84cSXiaoyao Li
find_in_supported_entry(uint32_t function,uint32_t index)5280ba06e46SXiaoyao Li static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function,
5290ba06e46SXiaoyao Li uint32_t index)
5300ba06e46SXiaoyao Li {
5310ba06e46SXiaoyao Li struct kvm_cpuid_entry2 *e;
5320ba06e46SXiaoyao Li
5330ba06e46SXiaoyao Li e = cpuid_find_entry(tdx_supported_cpuid, function, index);
5340ba06e46SXiaoyao Li if (!e) {
5350ba06e46SXiaoyao Li if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) {
5360ba06e46SXiaoyao Li error_report("tdx_supported_cpuid requries more space than %d entries",
5370ba06e46SXiaoyao Li KVM_MAX_CPUID_ENTRIES);
5380ba06e46SXiaoyao Li exit(1);
5390ba06e46SXiaoyao Li }
5400ba06e46SXiaoyao Li e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++];
5410ba06e46SXiaoyao Li e->function = function;
5420ba06e46SXiaoyao Li e->index = index;
5430ba06e46SXiaoyao Li }
5440ba06e46SXiaoyao Li
5450ba06e46SXiaoyao Li return e;
5460ba06e46SXiaoyao Li }
5470ba06e46SXiaoyao Li
tdx_add_supported_cpuid_by_fixed1_bits(void)5480ba06e46SXiaoyao Li static void tdx_add_supported_cpuid_by_fixed1_bits(void)
5490ba06e46SXiaoyao Li {
5500ba06e46SXiaoyao Li struct kvm_cpuid_entry2 *e, *e1;
5510ba06e46SXiaoyao Li int i;
5520ba06e46SXiaoyao Li
5530ba06e46SXiaoyao Li for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) {
5540ba06e46SXiaoyao Li e = &tdx_fixed1_bits.entries[i];
5550ba06e46SXiaoyao Li
5560ba06e46SXiaoyao Li e1 = find_in_supported_entry(e->function, e->index);
5570ba06e46SXiaoyao Li e1->eax |= e->eax;
5580ba06e46SXiaoyao Li e1->ebx |= e->ebx;
5590ba06e46SXiaoyao Li e1->ecx |= e->ecx;
5600ba06e46SXiaoyao Li e1->edx |= e->edx;
5610ba06e46SXiaoyao Li }
5620ba06e46SXiaoyao Li }
5630ba06e46SXiaoyao Li
tdx_add_supported_cpuid_by_attrs(void)56431df29c5SXiaoyao Li static void tdx_add_supported_cpuid_by_attrs(void)
56531df29c5SXiaoyao Li {
56631df29c5SXiaoyao Li struct kvm_cpuid_entry2 *e;
56731df29c5SXiaoyao Li TdxAttrsMap *map;
56831df29c5SXiaoyao Li int i;
56931df29c5SXiaoyao Li
57031df29c5SXiaoyao Li for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) {
57131df29c5SXiaoyao Li map = &tdx_attrs_maps[i];
57231df29c5SXiaoyao Li if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) {
57331df29c5SXiaoyao Li continue;
57431df29c5SXiaoyao Li }
57531df29c5SXiaoyao Li
57631df29c5SXiaoyao Li e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf);
57731df29c5SXiaoyao Li
57831df29c5SXiaoyao Li switch(map->cpuid_reg) {
57931df29c5SXiaoyao Li case R_EAX:
58031df29c5SXiaoyao Li e->eax |= map->feat_mask;
58131df29c5SXiaoyao Li break;
58231df29c5SXiaoyao Li case R_EBX:
58331df29c5SXiaoyao Li e->ebx |= map->feat_mask;
58431df29c5SXiaoyao Li break;
58531df29c5SXiaoyao Li case R_ECX:
58631df29c5SXiaoyao Li e->ecx |= map->feat_mask;
58731df29c5SXiaoyao Li break;
58831df29c5SXiaoyao Li case R_EDX:
58931df29c5SXiaoyao Li e->edx |= map->feat_mask;
59031df29c5SXiaoyao Li break;
59131df29c5SXiaoyao Li }
59231df29c5SXiaoyao Li }
59331df29c5SXiaoyao Li }
59431df29c5SXiaoyao Li
tdx_add_supported_cpuid_by_xfam(void)5958c94c84cSXiaoyao Li static void tdx_add_supported_cpuid_by_xfam(void)
5968c94c84cSXiaoyao Li {
5978c94c84cSXiaoyao Li struct kvm_cpuid_entry2 *e;
5988c94c84cSXiaoyao Li int i;
5998c94c84cSXiaoyao Li
6008c94c84cSXiaoyao Li const TdxXFAMDep *xfam_dep;
6018c94c84cSXiaoyao Li const FeatureWordInfo *f;
6028c94c84cSXiaoyao Li for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) {
6038c94c84cSXiaoyao Li xfam_dep = &tdx_xfam_deps[i];
6048c94c84cSXiaoyao Li if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) {
6058c94c84cSXiaoyao Li continue;
6068c94c84cSXiaoyao Li }
6078c94c84cSXiaoyao Li
6088c94c84cSXiaoyao Li f = &feature_word_info[xfam_dep->feat_mask.index];
6098c94c84cSXiaoyao Li if (f->type != CPUID_FEATURE_WORD) {
6108c94c84cSXiaoyao Li continue;
6118c94c84cSXiaoyao Li }
6128c94c84cSXiaoyao Li
6138c94c84cSXiaoyao Li e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx);
6148c94c84cSXiaoyao Li switch(f->cpuid.reg) {
6158c94c84cSXiaoyao Li case R_EAX:
6168c94c84cSXiaoyao Li e->eax |= xfam_dep->feat_mask.mask;
6178c94c84cSXiaoyao Li break;
6188c94c84cSXiaoyao Li case R_EBX:
6198c94c84cSXiaoyao Li e->ebx |= xfam_dep->feat_mask.mask;
6208c94c84cSXiaoyao Li break;
6218c94c84cSXiaoyao Li case R_ECX:
6228c94c84cSXiaoyao Li e->ecx |= xfam_dep->feat_mask.mask;
6238c94c84cSXiaoyao Li break;
6248c94c84cSXiaoyao Li case R_EDX:
6258c94c84cSXiaoyao Li e->edx |= xfam_dep->feat_mask.mask;
6268c94c84cSXiaoyao Li break;
6278c94c84cSXiaoyao Li }
6288c94c84cSXiaoyao Li }
6298c94c84cSXiaoyao Li
6308c94c84cSXiaoyao Li e = find_in_supported_entry(0xd, 0);
6318c94c84cSXiaoyao Li e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK);
6328c94c84cSXiaoyao Li e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32;
6338c94c84cSXiaoyao Li
6348c94c84cSXiaoyao Li e = find_in_supported_entry(0xd, 1);
6359f5771c5SXiaoyao Li /*
6369f5771c5SXiaoyao Li * Mark XFD always support for TDX, it will be cleared finally in
6379f5771c5SXiaoyao Li * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware
6389f5771c5SXiaoyao Li * because in this case the original data has it as 0.
6399f5771c5SXiaoyao Li */
6409f5771c5SXiaoyao Li e->eax |= CPUID_XSAVE_XFD;
6418c94c84cSXiaoyao Li e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK);
6428c94c84cSXiaoyao Li e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32;
6438c94c84cSXiaoyao Li }
6448c94c84cSXiaoyao Li
tdx_add_supported_kvm_features(void)6454d6e288aSXiaoyao Li static void tdx_add_supported_kvm_features(void)
6464d6e288aSXiaoyao Li {
6474d6e288aSXiaoyao Li struct kvm_cpuid_entry2 *e;
6484d6e288aSXiaoyao Li
6494d6e288aSXiaoyao Li e = find_in_supported_entry(0x40000001, 0);
6504d6e288aSXiaoyao Li e->eax = TDX_SUPPORTED_KVM_FEATURES;
6514d6e288aSXiaoyao Li }
6524d6e288aSXiaoyao Li
tdx_setup_supported_cpuid(void)65375ec6189SXiaoyao Li static void tdx_setup_supported_cpuid(void)
65475ec6189SXiaoyao Li {
65575ec6189SXiaoyao Li if (tdx_supported_cpuid) {
65675ec6189SXiaoyao Li return;
65775ec6189SXiaoyao Li }
65875ec6189SXiaoyao Li
65975ec6189SXiaoyao Li tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) +
66075ec6189SXiaoyao Li KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2));
66175ec6189SXiaoyao Li
66275ec6189SXiaoyao Li memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries,
66375ec6189SXiaoyao Li tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2));
66475ec6189SXiaoyao Li tdx_supported_cpuid->nent = tdx_caps->cpuid.nent;
6650ba06e46SXiaoyao Li
6660ba06e46SXiaoyao Li tdx_add_supported_cpuid_by_fixed1_bits();
66731df29c5SXiaoyao Li tdx_add_supported_cpuid_by_attrs();
6688c94c84cSXiaoyao Li tdx_add_supported_cpuid_by_xfam();
6694d6e288aSXiaoyao Li
6704d6e288aSXiaoyao Li tdx_add_supported_kvm_features();
67175ec6189SXiaoyao Li }
67275ec6189SXiaoyao Li
tdx_kvm_init(ConfidentialGuestSupport * cgs,Error ** errp)6738eddedc3SXiaoyao Li static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
6748eddedc3SXiaoyao Li {
675810d4e83SXiaoyao Li MachineState *ms = MACHINE(qdev_get_machine());
676810d4e83SXiaoyao Li X86MachineState *x86ms = X86_MACHINE(ms);
6771619d0e4SXiaoyao Li TdxGuest *tdx = TDX_GUEST(cgs);
6788eddedc3SXiaoyao Li int r = 0;
6798eddedc3SXiaoyao Li
6808eddedc3SXiaoyao Li kvm_mark_guest_state_protected();
6818eddedc3SXiaoyao Li
682810d4e83SXiaoyao Li if (x86ms->smm == ON_OFF_AUTO_AUTO) {
683810d4e83SXiaoyao Li x86ms->smm = ON_OFF_AUTO_OFF;
684810d4e83SXiaoyao Li } else if (x86ms->smm == ON_OFF_AUTO_ON) {
685810d4e83SXiaoyao Li error_setg(errp, "TDX VM doesn't support SMM");
686810d4e83SXiaoyao Li return -EINVAL;
687810d4e83SXiaoyao Li }
688810d4e83SXiaoyao Li
689e7ef6089SXiaoyao Li if (x86ms->pic == ON_OFF_AUTO_AUTO) {
690e7ef6089SXiaoyao Li x86ms->pic = ON_OFF_AUTO_OFF;
691e7ef6089SXiaoyao Li } else if (x86ms->pic == ON_OFF_AUTO_ON) {
692e7ef6089SXiaoyao Li error_setg(errp, "TDX VM doesn't support PIC");
693e7ef6089SXiaoyao Li return -EINVAL;
694e7ef6089SXiaoyao Li }
695e7ef6089SXiaoyao Li
696bb45580dSXiaoyao Li if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) {
697bb45580dSXiaoyao Li kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON;
698bb45580dSXiaoyao Li } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) {
699bb45580dSXiaoyao Li error_setg(errp, "TDX VM requires kernel_irqchip to be split");
700bb45580dSXiaoyao Li return -EINVAL;
701bb45580dSXiaoyao Li }
702bb45580dSXiaoyao Li
7038eddedc3SXiaoyao Li if (!tdx_caps) {
7048eddedc3SXiaoyao Li r = get_tdx_capabilities(errp);
7051619d0e4SXiaoyao Li if (r) {
7061619d0e4SXiaoyao Li return r;
7071619d0e4SXiaoyao Li }
7088eddedc3SXiaoyao Li }
7098eddedc3SXiaoyao Li
71075ec6189SXiaoyao Li tdx_setup_supported_cpuid();
71175ec6189SXiaoyao Li
7121ff5048dSXiaoyao Li /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */
7131ff5048dSXiaoyao Li if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) {
7141ff5048dSXiaoyao Li return -EOPNOTSUPP;
7151ff5048dSXiaoyao Li }
7161ff5048dSXiaoyao Li
717da672865SXiaoyao Li /*
718da672865SXiaoyao Li * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly
719da672865SXiaoyao Li * memory for shared memory but not for private memory. Besides, whether a
720da672865SXiaoyao Li * memslot is private or shared is not determined by QEMU.
721da672865SXiaoyao Li *
722da672865SXiaoyao Li * Thus, just mark readonly memory not supported for simplicity.
723da672865SXiaoyao Li */
724da672865SXiaoyao Li kvm_readonly_mem_allowed = false;
725da672865SXiaoyao Li
7264420ba0eSXiaoyao Li qemu_add_machine_init_done_notifier(&tdx_machine_done_notify);
7274420ba0eSXiaoyao Li
7281619d0e4SXiaoyao Li tdx_guest = tdx;
7291619d0e4SXiaoyao Li return 0;
7308eddedc3SXiaoyao Li }
7318eddedc3SXiaoyao Li
tdx_kvm_type(X86ConfidentialGuest * cg)732b455880eSXiaoyao Li static int tdx_kvm_type(X86ConfidentialGuest *cg)
733b455880eSXiaoyao Li {
734b455880eSXiaoyao Li /* Do the object check */
735b455880eSXiaoyao Li TDX_GUEST(cg);
736b455880eSXiaoyao Li
737b455880eSXiaoyao Li return KVM_X86_TDX_VM;
738b455880eSXiaoyao Li }
739b455880eSXiaoyao Li
tdx_cpu_instance_init(X86ConfidentialGuest * cg,CPUState * cpu)7407c615242SXiaoyao Li static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
7417c615242SXiaoyao Li {
7429002494fSXiaoyao Li X86CPU *x86cpu = X86_CPU(cpu);
7439002494fSXiaoyao Li
7447c615242SXiaoyao Li object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort);
7459002494fSXiaoyao Li
746ea4867b9SXiaoyao Li /* invtsc is fixed1 for TD guest */
747ea4867b9SXiaoyao Li object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort);
748ea4867b9SXiaoyao Li
7499002494fSXiaoyao Li x86cpu->enable_cpuid_0x1f = true;
7507c615242SXiaoyao Li }
7517c615242SXiaoyao Li
tdx_adjust_cpuid_features(X86ConfidentialGuest * cg,uint32_t feature,uint32_t index,int reg,uint32_t value)75275ec6189SXiaoyao Li static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg,
75375ec6189SXiaoyao Li uint32_t feature, uint32_t index,
75475ec6189SXiaoyao Li int reg, uint32_t value)
75575ec6189SXiaoyao Li {
75675ec6189SXiaoyao Li struct kvm_cpuid_entry2 *e;
75775ec6189SXiaoyao Li
7580ba06e46SXiaoyao Li e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index);
7590ba06e46SXiaoyao Li if (e) {
7600ba06e46SXiaoyao Li value |= cpuid_entry_get_reg(e, reg);
7610ba06e46SXiaoyao Li }
7620ba06e46SXiaoyao Li
76375ec6189SXiaoyao Li if (is_feature_word_cpuid(feature, index, reg)) {
76475ec6189SXiaoyao Li e = cpuid_find_entry(tdx_supported_cpuid, feature, index);
76575ec6189SXiaoyao Li if (e) {
76675ec6189SXiaoyao Li value &= cpuid_entry_get_reg(e, reg);
76775ec6189SXiaoyao Li }
76875ec6189SXiaoyao Li }
76975ec6189SXiaoyao Li
77075ec6189SXiaoyao Li return value;
77175ec6189SXiaoyao Li }
77275ec6189SXiaoyao Li
tdx_fetch_cpuid(CPUState * cpu,int * ret)773e3d1a4a6SXiaoyao Li static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret)
774e3d1a4a6SXiaoyao Li {
775e3d1a4a6SXiaoyao Li struct kvm_cpuid2 *fetch_cpuid;
776e3d1a4a6SXiaoyao Li int size = KVM_MAX_CPUID_ENTRIES;
777e3d1a4a6SXiaoyao Li Error *local_err = NULL;
778e3d1a4a6SXiaoyao Li int r;
779e3d1a4a6SXiaoyao Li
780e3d1a4a6SXiaoyao Li do {
781e3d1a4a6SXiaoyao Li error_free(local_err);
782e3d1a4a6SXiaoyao Li local_err = NULL;
783e3d1a4a6SXiaoyao Li
784e3d1a4a6SXiaoyao Li fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) +
785e3d1a4a6SXiaoyao Li sizeof(struct kvm_cpuid_entry2) * size);
786e3d1a4a6SXiaoyao Li fetch_cpuid->nent = size;
787e3d1a4a6SXiaoyao Li r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err);
788e3d1a4a6SXiaoyao Li if (r == -E2BIG) {
789e3d1a4a6SXiaoyao Li g_free(fetch_cpuid);
790e3d1a4a6SXiaoyao Li size = fetch_cpuid->nent;
791e3d1a4a6SXiaoyao Li }
792e3d1a4a6SXiaoyao Li } while (r == -E2BIG);
793e3d1a4a6SXiaoyao Li
794e3d1a4a6SXiaoyao Li if (r < 0) {
795e3d1a4a6SXiaoyao Li error_report_err(local_err);
796e3d1a4a6SXiaoyao Li *ret = r;
797e3d1a4a6SXiaoyao Li return NULL;
798e3d1a4a6SXiaoyao Li }
799e3d1a4a6SXiaoyao Li
800e3d1a4a6SXiaoyao Li return fetch_cpuid;
801e3d1a4a6SXiaoyao Li }
802e3d1a4a6SXiaoyao Li
tdx_check_features(X86ConfidentialGuest * cg,CPUState * cs)803e3d1a4a6SXiaoyao Li static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs)
804e3d1a4a6SXiaoyao Li {
805e3d1a4a6SXiaoyao Li uint64_t actual, requested, unavailable, forced_on;
806e3d1a4a6SXiaoyao Li g_autofree struct kvm_cpuid2 *fetch_cpuid;
807e3d1a4a6SXiaoyao Li const char *forced_on_prefix = NULL;
808e3d1a4a6SXiaoyao Li const char *unav_prefix = NULL;
809e3d1a4a6SXiaoyao Li struct kvm_cpuid_entry2 *entry;
810e3d1a4a6SXiaoyao Li X86CPU *cpu = X86_CPU(cs);
811e3d1a4a6SXiaoyao Li CPUX86State *env = &cpu->env;
812e3d1a4a6SXiaoyao Li FeatureWordInfo *wi;
813e3d1a4a6SXiaoyao Li FeatureWord w;
814e3d1a4a6SXiaoyao Li bool mismatch = false;
815e3d1a4a6SXiaoyao Li int r;
816e3d1a4a6SXiaoyao Li
817e3d1a4a6SXiaoyao Li fetch_cpuid = tdx_fetch_cpuid(cs, &r);
818e3d1a4a6SXiaoyao Li if (!fetch_cpuid) {
819e3d1a4a6SXiaoyao Li return r;
820e3d1a4a6SXiaoyao Li }
821e3d1a4a6SXiaoyao Li
822e3d1a4a6SXiaoyao Li if (cpu->check_cpuid || cpu->enforce_cpuid) {
823e3d1a4a6SXiaoyao Li unav_prefix = "TDX doesn't support requested feature";
824e3d1a4a6SXiaoyao Li forced_on_prefix = "TDX forcibly sets the feature";
825e3d1a4a6SXiaoyao Li }
826e3d1a4a6SXiaoyao Li
827e3d1a4a6SXiaoyao Li for (w = 0; w < FEATURE_WORDS; w++) {
828e3d1a4a6SXiaoyao Li wi = &feature_word_info[w];
829e3d1a4a6SXiaoyao Li actual = 0;
830e3d1a4a6SXiaoyao Li
831e3d1a4a6SXiaoyao Li switch (wi->type) {
832e3d1a4a6SXiaoyao Li case CPUID_FEATURE_WORD:
833e3d1a4a6SXiaoyao Li entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx);
834e3d1a4a6SXiaoyao Li if (!entry) {
835e3d1a4a6SXiaoyao Li /*
836e3d1a4a6SXiaoyao Li * If KVM doesn't report it means it's totally configurable
837e3d1a4a6SXiaoyao Li * by QEMU
838e3d1a4a6SXiaoyao Li */
839e3d1a4a6SXiaoyao Li continue;
840e3d1a4a6SXiaoyao Li }
841e3d1a4a6SXiaoyao Li
842e3d1a4a6SXiaoyao Li actual = cpuid_entry_get_reg(entry, wi->cpuid.reg);
843e3d1a4a6SXiaoyao Li break;
844e3d1a4a6SXiaoyao Li case MSR_FEATURE_WORD:
845e3d1a4a6SXiaoyao Li /*
846e3d1a4a6SXiaoyao Li * TODO:
847e3d1a4a6SXiaoyao Li * validate MSR features when KVM has interface report them.
848e3d1a4a6SXiaoyao Li */
849e3d1a4a6SXiaoyao Li continue;
850e3d1a4a6SXiaoyao Li }
851e3d1a4a6SXiaoyao Li
852deb9db6fSXiaoyao Li /* Fixup for special cases */
853deb9db6fSXiaoyao Li switch (w) {
854deb9db6fSXiaoyao Li case FEAT_8000_0001_EDX:
855deb9db6fSXiaoyao Li /*
856deb9db6fSXiaoyao Li * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit
857deb9db6fSXiaoyao Li * mode and before vcpu running it's not in 64-bit mode.
858deb9db6fSXiaoyao Li */
859deb9db6fSXiaoyao Li actual |= CPUID_EXT2_SYSCALL;
860deb9db6fSXiaoyao Li break;
861deb9db6fSXiaoyao Li default:
862deb9db6fSXiaoyao Li break;
863deb9db6fSXiaoyao Li }
864deb9db6fSXiaoyao Li
865e3d1a4a6SXiaoyao Li requested = env->features[w];
866e3d1a4a6SXiaoyao Li unavailable = requested & ~actual;
867e3d1a4a6SXiaoyao Li mark_unavailable_features(cpu, w, unavailable, unav_prefix);
868e3d1a4a6SXiaoyao Li if (unavailable) {
869e3d1a4a6SXiaoyao Li mismatch = true;
870e3d1a4a6SXiaoyao Li }
871e3d1a4a6SXiaoyao Li
872e3d1a4a6SXiaoyao Li forced_on = actual & ~requested;
873e3d1a4a6SXiaoyao Li mark_forced_on_features(cpu, w, forced_on, forced_on_prefix);
874e3d1a4a6SXiaoyao Li if (forced_on) {
875e3d1a4a6SXiaoyao Li mismatch = true;
876e3d1a4a6SXiaoyao Li }
877e3d1a4a6SXiaoyao Li }
878e3d1a4a6SXiaoyao Li
879e3d1a4a6SXiaoyao Li if (cpu->enforce_cpuid && mismatch) {
880e3d1a4a6SXiaoyao Li return -EINVAL;
881e3d1a4a6SXiaoyao Li }
882e3d1a4a6SXiaoyao Li
883907ee7b6SXiaoyao Li if (cpu->phys_bits != host_cpu_phys_bits()) {
884907ee7b6SXiaoyao Li error_report("TDX requires guest CPU physical bits (%u) "
885907ee7b6SXiaoyao Li "to match host CPU physical bits (%u)",
886907ee7b6SXiaoyao Li cpu->phys_bits, host_cpu_phys_bits());
887907ee7b6SXiaoyao Li return -EINVAL;
888907ee7b6SXiaoyao Li }
889907ee7b6SXiaoyao Li
890e3d1a4a6SXiaoyao Li return 0;
891e3d1a4a6SXiaoyao Li }
892e3d1a4a6SXiaoyao Li
tdx_validate_attributes(TdxGuest * tdx,Error ** errp)89353b6f406SXiaoyao Li static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
89453b6f406SXiaoyao Li {
89553b6f406SXiaoyao Li if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
896*e7f926ebSCédric Le Goater error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM "
897*e7f926ebSCédric Le Goater "(KVM supported: 0x%"PRIx64")", tdx->attributes,
898*e7f926ebSCédric Le Goater (uint64_t)tdx_caps->supported_attrs);
89953b6f406SXiaoyao Li return -1;
90053b6f406SXiaoyao Li }
90153b6f406SXiaoyao Li
90253b6f406SXiaoyao Li if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) {
90353b6f406SXiaoyao Li error_setg(errp, "Some QEMU unsupported TD attribute bits being "
904*e7f926ebSCédric Le Goater "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")",
905*e7f926ebSCédric Le Goater tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS);
90653b6f406SXiaoyao Li return -1;
90753b6f406SXiaoyao Li }
90853b6f406SXiaoyao Li
90953b6f406SXiaoyao Li return 0;
91053b6f406SXiaoyao Li }
91153b6f406SXiaoyao Li
setup_td_guest_attributes(X86CPU * x86cpu,Error ** errp)91253b6f406SXiaoyao Li static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp)
913bb3be394SXiaoyao Li {
914bb3be394SXiaoyao Li CPUX86State *env = &x86cpu->env;
915bb3be394SXiaoyao Li
916bb3be394SXiaoyao Li tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ?
917bb3be394SXiaoyao Li TDX_TD_ATTRIBUTES_PKS : 0;
918bb3be394SXiaoyao Li tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0;
91953b6f406SXiaoyao Li
92053b6f406SXiaoyao Li return tdx_validate_attributes(tdx_guest, errp);
921bb3be394SXiaoyao Li }
922bb3be394SXiaoyao Li
setup_td_xfam(X86CPU * x86cpu,Error ** errp)923f15898b0SXiaoyao Li static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
924f15898b0SXiaoyao Li {
925f15898b0SXiaoyao Li CPUX86State *env = &x86cpu->env;
926f15898b0SXiaoyao Li uint64_t xfam;
927f15898b0SXiaoyao Li
928f15898b0SXiaoyao Li xfam = env->features[FEAT_XSAVE_XCR0_LO] |
929f15898b0SXiaoyao Li env->features[FEAT_XSAVE_XCR0_HI] |
930f15898b0SXiaoyao Li env->features[FEAT_XSAVE_XSS_LO] |
931f15898b0SXiaoyao Li env->features[FEAT_XSAVE_XSS_HI];
932f15898b0SXiaoyao Li
933f15898b0SXiaoyao Li if (xfam & ~tdx_caps->supported_xfam) {
934*e7f926ebSCédric Le Goater error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))",
935*e7f926ebSCédric Le Goater xfam, (uint64_t)tdx_caps->supported_xfam);
936f15898b0SXiaoyao Li return -1;
937f15898b0SXiaoyao Li }
938f15898b0SXiaoyao Li
939f15898b0SXiaoyao Li tdx_guest->xfam = xfam;
940f15898b0SXiaoyao Li return 0;
941f15898b0SXiaoyao Li }
942f15898b0SXiaoyao Li
tdx_filter_cpuid(struct kvm_cpuid2 * cpuids)943f15898b0SXiaoyao Li static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
944f15898b0SXiaoyao Li {
945f15898b0SXiaoyao Li int i, dest_cnt = 0;
946f15898b0SXiaoyao Li struct kvm_cpuid_entry2 *src, *dest, *conf;
947f15898b0SXiaoyao Li
948f15898b0SXiaoyao Li for (i = 0; i < cpuids->nent; i++) {
949f15898b0SXiaoyao Li src = cpuids->entries + i;
950f15898b0SXiaoyao Li conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
951f15898b0SXiaoyao Li if (!conf) {
952f15898b0SXiaoyao Li continue;
953f15898b0SXiaoyao Li }
954f15898b0SXiaoyao Li dest = cpuids->entries + dest_cnt;
955f15898b0SXiaoyao Li
956f15898b0SXiaoyao Li dest->function = src->function;
957f15898b0SXiaoyao Li dest->index = src->index;
958f15898b0SXiaoyao Li dest->flags = src->flags;
959f15898b0SXiaoyao Li dest->eax = src->eax & conf->eax;
960f15898b0SXiaoyao Li dest->ebx = src->ebx & conf->ebx;
961f15898b0SXiaoyao Li dest->ecx = src->ecx & conf->ecx;
962f15898b0SXiaoyao Li dest->edx = src->edx & conf->edx;
963f15898b0SXiaoyao Li
964f15898b0SXiaoyao Li dest_cnt++;
965f15898b0SXiaoyao Li }
966f15898b0SXiaoyao Li cpuids->nent = dest_cnt++;
967f15898b0SXiaoyao Li }
968f15898b0SXiaoyao Li
tdx_pre_create_vcpu(CPUState * cpu,Error ** errp)969f15898b0SXiaoyao Li int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
970f15898b0SXiaoyao Li {
971f15898b0SXiaoyao Li X86CPU *x86cpu = X86_CPU(cpu);
972f15898b0SXiaoyao Li CPUX86State *env = &x86cpu->env;
973f15898b0SXiaoyao Li g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
974f15898b0SXiaoyao Li Error *local_err = NULL;
975d05a0858SIsaku Yamahata size_t data_len;
976f15898b0SXiaoyao Li int retry = 10000;
977f15898b0SXiaoyao Li int r = 0;
978f15898b0SXiaoyao Li
979f15898b0SXiaoyao Li QEMU_LOCK_GUARD(&tdx_guest->lock);
980f15898b0SXiaoyao Li if (tdx_guest->initialized) {
981f15898b0SXiaoyao Li return r;
982f15898b0SXiaoyao Li }
983f15898b0SXiaoyao Li
984f15898b0SXiaoyao Li init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
985f15898b0SXiaoyao Li sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
986f15898b0SXiaoyao Li
987d529a2acSXiaoyao Li if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) {
988d529a2acSXiaoyao Li error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS");
989d529a2acSXiaoyao Li return -EOPNOTSUPP;
990d529a2acSXiaoyao Li }
991d529a2acSXiaoyao Li
992d529a2acSXiaoyao Li r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
993d529a2acSXiaoyao Li 0, TDX_APIC_BUS_CYCLES_NS);
994d529a2acSXiaoyao Li if (r < 0) {
995d529a2acSXiaoyao Li error_setg_errno(errp, -r,
996d529a2acSXiaoyao Li "Unable to set core crystal clock frequency to 25MHz");
997d529a2acSXiaoyao Li return r;
998d529a2acSXiaoyao Li }
999d529a2acSXiaoyao Li
10000e73b843SXiaoyao Li if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ ||
10010e73b843SXiaoyao Li env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) {
1002*e7f926ebSCédric Le Goater error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency "
10030e73b843SXiaoyao Li "between [%d, %d] kHz", env->tsc_khz,
10040e73b843SXiaoyao Li TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ);
10050e73b843SXiaoyao Li return -EINVAL;
10060e73b843SXiaoyao Li }
10070e73b843SXiaoyao Li
10080e73b843SXiaoyao Li if (env->tsc_khz % (25 * 1000)) {
1009*e7f926ebSCédric Le Goater error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz",
10100e73b843SXiaoyao Li env->tsc_khz);
10110e73b843SXiaoyao Li return -EINVAL;
10120e73b843SXiaoyao Li }
10130e73b843SXiaoyao Li
10140e73b843SXiaoyao Li /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */
10150e73b843SXiaoyao Li r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz);
10160e73b843SXiaoyao Li if (r < 0) {
1017*e7f926ebSCédric Le Goater error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz",
10180e73b843SXiaoyao Li env->tsc_khz);
10190e73b843SXiaoyao Li return r;
10200e73b843SXiaoyao Li }
10210e73b843SXiaoyao Li
1022d05a0858SIsaku Yamahata if (tdx_guest->mrconfigid) {
1023d05a0858SIsaku Yamahata g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid,
1024d05a0858SIsaku Yamahata strlen(tdx_guest->mrconfigid), &data_len, errp);
1025d05a0858SIsaku Yamahata if (!data) {
1026d05a0858SIsaku Yamahata return -1;
1027d05a0858SIsaku Yamahata }
1028d05a0858SIsaku Yamahata if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
1029d05a0858SIsaku Yamahata error_setg(errp, "TDX: failed to decode mrconfigid");
1030d05a0858SIsaku Yamahata return -1;
1031d05a0858SIsaku Yamahata }
1032d05a0858SIsaku Yamahata memcpy(init_vm->mrconfigid, data, data_len);
1033d05a0858SIsaku Yamahata }
1034d05a0858SIsaku Yamahata
1035d05a0858SIsaku Yamahata if (tdx_guest->mrowner) {
1036d05a0858SIsaku Yamahata g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner,
1037d05a0858SIsaku Yamahata strlen(tdx_guest->mrowner), &data_len, errp);
1038d05a0858SIsaku Yamahata if (!data) {
1039d05a0858SIsaku Yamahata return -1;
1040d05a0858SIsaku Yamahata }
1041d05a0858SIsaku Yamahata if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
1042d05a0858SIsaku Yamahata error_setg(errp, "TDX: failed to decode mrowner");
1043d05a0858SIsaku Yamahata return -1;
1044d05a0858SIsaku Yamahata }
1045d05a0858SIsaku Yamahata memcpy(init_vm->mrowner, data, data_len);
1046d05a0858SIsaku Yamahata }
1047d05a0858SIsaku Yamahata
1048d05a0858SIsaku Yamahata if (tdx_guest->mrownerconfig) {
1049d05a0858SIsaku Yamahata g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig,
1050d05a0858SIsaku Yamahata strlen(tdx_guest->mrownerconfig), &data_len, errp);
1051d05a0858SIsaku Yamahata if (!data) {
1052d05a0858SIsaku Yamahata return -1;
1053d05a0858SIsaku Yamahata }
1054d05a0858SIsaku Yamahata if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
1055d05a0858SIsaku Yamahata error_setg(errp, "TDX: failed to decode mrownerconfig");
1056d05a0858SIsaku Yamahata return -1;
1057d05a0858SIsaku Yamahata }
1058d05a0858SIsaku Yamahata memcpy(init_vm->mrownerconfig, data, data_len);
1059d05a0858SIsaku Yamahata }
1060d05a0858SIsaku Yamahata
106153b6f406SXiaoyao Li r = setup_td_guest_attributes(x86cpu, errp);
106253b6f406SXiaoyao Li if (r) {
106353b6f406SXiaoyao Li return r;
106453b6f406SXiaoyao Li }
1065bb3be394SXiaoyao Li
1066f15898b0SXiaoyao Li r = setup_td_xfam(x86cpu, errp);
1067f15898b0SXiaoyao Li if (r) {
1068f15898b0SXiaoyao Li return r;
1069f15898b0SXiaoyao Li }
1070f15898b0SXiaoyao Li
1071f15898b0SXiaoyao Li init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
1072f15898b0SXiaoyao Li tdx_filter_cpuid(&init_vm->cpuid);
1073f15898b0SXiaoyao Li
1074f15898b0SXiaoyao Li init_vm->attributes = tdx_guest->attributes;
1075f15898b0SXiaoyao Li init_vm->xfam = tdx_guest->xfam;
1076f15898b0SXiaoyao Li
1077f15898b0SXiaoyao Li /*
1078f15898b0SXiaoyao Li * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE)
1079f15898b0SXiaoyao Li * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or
1080f15898b0SXiaoyao Li * RDSEED) is busy.
1081f15898b0SXiaoyao Li *
1082f15898b0SXiaoyao Li * Retry for the case.
1083f15898b0SXiaoyao Li */
1084f15898b0SXiaoyao Li do {
1085f15898b0SXiaoyao Li error_free(local_err);
1086f15898b0SXiaoyao Li local_err = NULL;
1087f15898b0SXiaoyao Li r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err);
1088f15898b0SXiaoyao Li } while (r == -EAGAIN && --retry);
1089f15898b0SXiaoyao Li
1090f15898b0SXiaoyao Li if (r < 0) {
1091f15898b0SXiaoyao Li if (!retry) {
1092f15898b0SXiaoyao Li error_append_hint(&local_err, "Hardware RNG (Random Number "
1093f15898b0SXiaoyao Li "Generator) is busy occupied by someone (via RDRAND/RDSEED) "
1094f15898b0SXiaoyao Li "maliciously, which leads to KVM_TDX_INIT_VM keeping failure "
1095f15898b0SXiaoyao Li "due to lack of entropy.\n");
1096f15898b0SXiaoyao Li }
1097f15898b0SXiaoyao Li error_propagate(errp, local_err);
1098f15898b0SXiaoyao Li return r;
1099f15898b0SXiaoyao Li }
1100f15898b0SXiaoyao Li
1101f15898b0SXiaoyao Li tdx_guest->initialized = true;
1102f15898b0SXiaoyao Li
1103f15898b0SXiaoyao Li return 0;
1104f15898b0SXiaoyao Li }
1105f15898b0SXiaoyao Li
tdx_parse_tdvf(void * flash_ptr,int size)1106cb5d65a8SXiaoyao Li int tdx_parse_tdvf(void *flash_ptr, int size)
1107cb5d65a8SXiaoyao Li {
1108cb5d65a8SXiaoyao Li return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
1109cb5d65a8SXiaoyao Li }
1110cb5d65a8SXiaoyao Li
tdx_panicked_on_fatal_error(X86CPU * cpu,uint64_t error_code,char * message,uint64_t gpa)11116e250463SXiaoyao Li static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
11126e250463SXiaoyao Li char *message, uint64_t gpa)
11136e250463SXiaoyao Li {
11146e250463SXiaoyao Li GuestPanicInformation *panic_info;
11156e250463SXiaoyao Li
11166e250463SXiaoyao Li panic_info = g_new0(GuestPanicInformation, 1);
11176e250463SXiaoyao Li panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX;
11186e250463SXiaoyao Li panic_info->u.tdx.error_code = (uint32_t) error_code;
11196e250463SXiaoyao Li panic_info->u.tdx.message = message;
11206e250463SXiaoyao Li panic_info->u.tdx.gpa = gpa;
11216e250463SXiaoyao Li
11226e250463SXiaoyao Li qemu_system_guest_panicked(panic_info);
11236e250463SXiaoyao Li }
11246e250463SXiaoyao Li
112598dbfd68SXiaoyao Li /*
112698dbfd68SXiaoyao Li * Only 8 registers can contain valid ASCII byte stream to form the fatal
112798dbfd68SXiaoyao Li * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX
112898dbfd68SXiaoyao Li */
112998dbfd68SXiaoyao Li #define TDX_FATAL_MESSAGE_MAX 64
113098dbfd68SXiaoyao Li
11316e250463SXiaoyao Li #define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63)
11326e250463SXiaoyao Li
tdx_handle_report_fatal_error(X86CPU * cpu,struct kvm_run * run)113398dbfd68SXiaoyao Li int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
113498dbfd68SXiaoyao Li {
113598dbfd68SXiaoyao Li uint64_t error_code = run->system_event.data[R_R12];
113698dbfd68SXiaoyao Li uint64_t reg_mask = run->system_event.data[R_ECX];
113798dbfd68SXiaoyao Li char *message = NULL;
113898dbfd68SXiaoyao Li uint64_t *tmp;
11396e250463SXiaoyao Li uint64_t gpa = -1ull;
114098dbfd68SXiaoyao Li
114198dbfd68SXiaoyao Li if (error_code & 0xffff) {
1142*e7f926ebSCédric Le Goater error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64,
114398dbfd68SXiaoyao Li error_code);
114498dbfd68SXiaoyao Li return -1;
114598dbfd68SXiaoyao Li }
114698dbfd68SXiaoyao Li
114798dbfd68SXiaoyao Li if (reg_mask) {
114898dbfd68SXiaoyao Li message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1);
114998dbfd68SXiaoyao Li tmp = (uint64_t *)message;
115098dbfd68SXiaoyao Li
115198dbfd68SXiaoyao Li #define COPY_REG(REG) \
115298dbfd68SXiaoyao Li do { \
115398dbfd68SXiaoyao Li if (reg_mask & BIT_ULL(REG)) { \
115498dbfd68SXiaoyao Li *(tmp++) = run->system_event.data[REG]; \
115598dbfd68SXiaoyao Li } \
115698dbfd68SXiaoyao Li } while (0)
115798dbfd68SXiaoyao Li
115898dbfd68SXiaoyao Li COPY_REG(R_R14);
115998dbfd68SXiaoyao Li COPY_REG(R_R15);
116098dbfd68SXiaoyao Li COPY_REG(R_EBX);
116198dbfd68SXiaoyao Li COPY_REG(R_EDI);
116298dbfd68SXiaoyao Li COPY_REG(R_ESI);
116398dbfd68SXiaoyao Li COPY_REG(R_R8);
116498dbfd68SXiaoyao Li COPY_REG(R_R9);
116598dbfd68SXiaoyao Li COPY_REG(R_EDX);
116698dbfd68SXiaoyao Li *((char *)tmp) = '\0';
116798dbfd68SXiaoyao Li }
116898dbfd68SXiaoyao Li #undef COPY_REG
116998dbfd68SXiaoyao Li
11706e250463SXiaoyao Li if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) {
11716e250463SXiaoyao Li gpa = run->system_event.data[R_R13];
11726e250463SXiaoyao Li }
11736e250463SXiaoyao Li
11746e250463SXiaoyao Li tdx_panicked_on_fatal_error(cpu, error_code, message, gpa);
11756e250463SXiaoyao Li
117698dbfd68SXiaoyao Li return -1;
117798dbfd68SXiaoyao Li }
117898dbfd68SXiaoyao Li
tdx_guest_get_sept_ve_disable(Object * obj,Error ** errp)11796016e297SXiaoyao Li static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp)
11806016e297SXiaoyao Li {
11816016e297SXiaoyao Li TdxGuest *tdx = TDX_GUEST(obj);
11826016e297SXiaoyao Li
11836016e297SXiaoyao Li return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE);
11846016e297SXiaoyao Li }
11856016e297SXiaoyao Li
tdx_guest_set_sept_ve_disable(Object * obj,bool value,Error ** errp)11866016e297SXiaoyao Li static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp)
11876016e297SXiaoyao Li {
11886016e297SXiaoyao Li TdxGuest *tdx = TDX_GUEST(obj);
11896016e297SXiaoyao Li
11906016e297SXiaoyao Li if (value) {
11916016e297SXiaoyao Li tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
11926016e297SXiaoyao Li } else {
11936016e297SXiaoyao Li tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
11946016e297SXiaoyao Li }
11956016e297SXiaoyao Li }
11966016e297SXiaoyao Li
tdx_guest_get_mrconfigid(Object * obj,Error ** errp)1197d05a0858SIsaku Yamahata static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp)
1198d05a0858SIsaku Yamahata {
1199d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj);
1200d05a0858SIsaku Yamahata
1201d05a0858SIsaku Yamahata return g_strdup(tdx->mrconfigid);
1202d05a0858SIsaku Yamahata }
1203d05a0858SIsaku Yamahata
tdx_guest_set_mrconfigid(Object * obj,const char * value,Error ** errp)1204d05a0858SIsaku Yamahata static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp)
1205d05a0858SIsaku Yamahata {
1206d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj);
1207d05a0858SIsaku Yamahata
1208d05a0858SIsaku Yamahata g_free(tdx->mrconfigid);
1209d05a0858SIsaku Yamahata tdx->mrconfigid = g_strdup(value);
1210d05a0858SIsaku Yamahata }
1211d05a0858SIsaku Yamahata
tdx_guest_get_mrowner(Object * obj,Error ** errp)1212d05a0858SIsaku Yamahata static char *tdx_guest_get_mrowner(Object *obj, Error **errp)
1213d05a0858SIsaku Yamahata {
1214d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj);
1215d05a0858SIsaku Yamahata
1216d05a0858SIsaku Yamahata return g_strdup(tdx->mrowner);
1217d05a0858SIsaku Yamahata }
1218d05a0858SIsaku Yamahata
tdx_guest_set_mrowner(Object * obj,const char * value,Error ** errp)1219d05a0858SIsaku Yamahata static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp)
1220d05a0858SIsaku Yamahata {
1221d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj);
1222d05a0858SIsaku Yamahata
1223d05a0858SIsaku Yamahata g_free(tdx->mrowner);
1224d05a0858SIsaku Yamahata tdx->mrowner = g_strdup(value);
1225d05a0858SIsaku Yamahata }
1226d05a0858SIsaku Yamahata
tdx_guest_get_mrownerconfig(Object * obj,Error ** errp)1227d05a0858SIsaku Yamahata static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp)
1228d05a0858SIsaku Yamahata {
1229d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj);
1230d05a0858SIsaku Yamahata
1231d05a0858SIsaku Yamahata return g_strdup(tdx->mrownerconfig);
1232d05a0858SIsaku Yamahata }
1233d05a0858SIsaku Yamahata
tdx_guest_set_mrownerconfig(Object * obj,const char * value,Error ** errp)1234d05a0858SIsaku Yamahata static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp)
1235d05a0858SIsaku Yamahata {
1236d05a0858SIsaku Yamahata TdxGuest *tdx = TDX_GUEST(obj);
1237d05a0858SIsaku Yamahata
1238d05a0858SIsaku Yamahata g_free(tdx->mrownerconfig);
1239d05a0858SIsaku Yamahata tdx->mrownerconfig = g_strdup(value);
1240d05a0858SIsaku Yamahata }
1241d05a0858SIsaku Yamahata
1242756e12e7SXiaoyao Li /* tdx guest */
1243756e12e7SXiaoyao Li OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
1244756e12e7SXiaoyao Li tdx_guest,
1245756e12e7SXiaoyao Li TDX_GUEST,
1246756e12e7SXiaoyao Li X86_CONFIDENTIAL_GUEST,
1247756e12e7SXiaoyao Li { TYPE_USER_CREATABLE },
1248756e12e7SXiaoyao Li { NULL })
1249756e12e7SXiaoyao Li
tdx_guest_init(Object * obj)1250756e12e7SXiaoyao Li static void tdx_guest_init(Object *obj)
1251756e12e7SXiaoyao Li {
1252756e12e7SXiaoyao Li ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
1253756e12e7SXiaoyao Li TdxGuest *tdx = TDX_GUEST(obj);
1254756e12e7SXiaoyao Li
1255f15898b0SXiaoyao Li qemu_mutex_init(&tdx->lock);
1256f15898b0SXiaoyao Li
1257756e12e7SXiaoyao Li cgs->require_guest_memfd = true;
1258714af522SIsaku Yamahata tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
1259756e12e7SXiaoyao Li
1260756e12e7SXiaoyao Li object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
1261756e12e7SXiaoyao Li OBJ_PROP_FLAG_READWRITE);
12626016e297SXiaoyao Li object_property_add_bool(obj, "sept-ve-disable",
12636016e297SXiaoyao Li tdx_guest_get_sept_ve_disable,
12646016e297SXiaoyao Li tdx_guest_set_sept_ve_disable);
1265d05a0858SIsaku Yamahata object_property_add_str(obj, "mrconfigid",
1266d05a0858SIsaku Yamahata tdx_guest_get_mrconfigid,
1267d05a0858SIsaku Yamahata tdx_guest_set_mrconfigid);
1268d05a0858SIsaku Yamahata object_property_add_str(obj, "mrowner",
1269d05a0858SIsaku Yamahata tdx_guest_get_mrowner, tdx_guest_set_mrowner);
1270d05a0858SIsaku Yamahata object_property_add_str(obj, "mrownerconfig",
1271d05a0858SIsaku Yamahata tdx_guest_get_mrownerconfig,
1272d05a0858SIsaku Yamahata tdx_guest_set_mrownerconfig);
1273756e12e7SXiaoyao Li }
1274756e12e7SXiaoyao Li
tdx_guest_finalize(Object * obj)1275756e12e7SXiaoyao Li static void tdx_guest_finalize(Object *obj)
1276756e12e7SXiaoyao Li {
1277756e12e7SXiaoyao Li }
1278756e12e7SXiaoyao Li
tdx_guest_class_init(ObjectClass * oc,const void * data)1279756e12e7SXiaoyao Li static void tdx_guest_class_init(ObjectClass *oc, const void *data)
1280756e12e7SXiaoyao Li {
1281631a2ac5SXiaoyao Li ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
1282b455880eSXiaoyao Li X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
1283b455880eSXiaoyao Li
1284631a2ac5SXiaoyao Li klass->kvm_init = tdx_kvm_init;
1285b455880eSXiaoyao Li x86_klass->kvm_type = tdx_kvm_type;
12867c615242SXiaoyao Li x86_klass->cpu_instance_init = tdx_cpu_instance_init;
128775ec6189SXiaoyao Li x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features;
1288e3d1a4a6SXiaoyao Li x86_klass->check_features = tdx_check_features;
1289756e12e7SXiaoyao Li }
1290