1 /* 2 * QEMU TDX support 3 * 4 * Copyright (c) 2025 Intel Corporation 5 * 6 * Author: 7 * Xiaoyao Li <xiaoyao.li@intel.com> 8 * 9 * SPDX-License-Identifier: GPL-2.0-or-later 10 */ 11 12 #include "qemu/osdep.h" 13 #include "qemu/error-report.h" 14 #include "qemu/base64.h" 15 #include "qemu/mmap-alloc.h" 16 #include "qapi/error.h" 17 #include "qom/object_interfaces.h" 18 #include "crypto/hash.h" 19 #include "system/kvm_int.h" 20 #include "system/runstate.h" 21 #include "system/system.h" 22 #include "system/ramblock.h" 23 24 #include <linux/kvm_para.h> 25 26 #include "hw/i386/e820_memory_layout.h" 27 #include "hw/i386/tdvf.h" 28 #include "hw/i386/x86.h" 29 #include "hw/i386/tdvf-hob.h" 30 #include "kvm_i386.h" 31 #include "tdx.h" 32 33 #define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000) 34 #define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000) 35 36 #define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0) 37 #define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) 38 #define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) 39 #define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63) 40 41 #define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\ 42 TDX_TD_ATTRIBUTES_PKS | \ 43 TDX_TD_ATTRIBUTES_PERFMON) 44 45 static TdxGuest *tdx_guest; 46 47 static struct kvm_tdx_capabilities *tdx_caps; 48 49 /* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */ 50 bool is_tdx_vm(void) 51 { 52 return !!tdx_guest; 53 } 54 55 enum tdx_ioctl_level { 56 TDX_VM_IOCTL, 57 TDX_VCPU_IOCTL, 58 }; 59 60 static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state, 61 int cmd_id, __u32 flags, void *data, 62 Error **errp) 63 { 64 struct kvm_tdx_cmd tdx_cmd = {}; 65 int r; 66 67 const char *tdx_ioctl_name[] = { 68 [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES", 69 [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM", 70 [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU", 71 [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION", 72 [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM", 73 [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID", 74 }; 75 76 tdx_cmd.id = cmd_id; 77 tdx_cmd.flags = flags; 78 tdx_cmd.data = (__u64)(unsigned long)data; 79 80 switch (level) { 81 case TDX_VM_IOCTL: 82 r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); 83 break; 84 case TDX_VCPU_IOCTL: 85 r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); 86 break; 87 default: 88 error_setg(errp, "Invalid tdx_ioctl_level %d", level); 89 return -EINVAL; 90 } 91 92 if (r < 0) { 93 error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx", 94 tdx_ioctl_name[cmd_id], tdx_cmd.hw_error); 95 } 96 return r; 97 } 98 99 static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data, 100 Error **errp) 101 { 102 return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp); 103 } 104 105 static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags, 106 void *data, Error **errp) 107 { 108 return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp); 109 } 110 111 static int get_tdx_capabilities(Error **errp) 112 { 113 struct kvm_tdx_capabilities *caps; 114 /* 1st generation of TDX reports 6 cpuid configs */ 115 int nr_cpuid_configs = 6; 116 size_t size; 117 int r; 118 119 do { 120 Error *local_err = NULL; 121 size = sizeof(struct kvm_tdx_capabilities) + 122 nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2); 123 caps = g_malloc0(size); 124 caps->cpuid.nent = nr_cpuid_configs; 125 126 r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err); 127 if (r == -E2BIG) { 128 g_free(caps); 129 nr_cpuid_configs *= 2; 130 if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) { 131 error_report("KVM TDX seems broken that number of CPUID entries" 132 " in kvm_tdx_capabilities exceeds limit: %d", 133 KVM_MAX_CPUID_ENTRIES); 134 error_propagate(errp, local_err); 135 return r; 136 } 137 error_free(local_err); 138 } else if (r < 0) { 139 g_free(caps); 140 error_propagate(errp, local_err); 141 return r; 142 } 143 } while (r == -E2BIG); 144 145 tdx_caps = caps; 146 147 return 0; 148 } 149 150 void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) 151 { 152 assert(!tdx_guest->tdvf_mr); 153 tdx_guest->tdvf_mr = tdvf_mr; 154 } 155 156 static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx) 157 { 158 TdxFirmwareEntry *entry; 159 160 for_each_tdx_fw_entry(&tdx->tdvf, entry) { 161 if (entry->type == TDVF_SECTION_TYPE_TD_HOB) { 162 return entry; 163 } 164 } 165 error_report("TDVF metadata doesn't specify TD_HOB location."); 166 exit(1); 167 } 168 169 static void tdx_add_ram_entry(uint64_t address, uint64_t length, 170 enum TdxRamType type) 171 { 172 uint32_t nr_entries = tdx_guest->nr_ram_entries; 173 tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries, 174 nr_entries + 1); 175 176 tdx_guest->ram_entries[nr_entries].address = address; 177 tdx_guest->ram_entries[nr_entries].length = length; 178 tdx_guest->ram_entries[nr_entries].type = type; 179 tdx_guest->nr_ram_entries++; 180 } 181 182 static int tdx_accept_ram_range(uint64_t address, uint64_t length) 183 { 184 uint64_t head_start, tail_start, head_length, tail_length; 185 uint64_t tmp_address, tmp_length; 186 TdxRamEntry *e; 187 int i = 0; 188 189 do { 190 if (i == tdx_guest->nr_ram_entries) { 191 return -1; 192 } 193 194 e = &tdx_guest->ram_entries[i++]; 195 } while (address + length <= e->address || address >= e->address + e->length); 196 197 /* 198 * The to-be-accepted ram range must be fully contained by one 199 * RAM entry. 200 */ 201 if (e->address > address || 202 e->address + e->length < address + length) { 203 return -1; 204 } 205 206 if (e->type == TDX_RAM_ADDED) { 207 return 0; 208 } 209 210 tmp_address = e->address; 211 tmp_length = e->length; 212 213 e->address = address; 214 e->length = length; 215 e->type = TDX_RAM_ADDED; 216 217 head_length = address - tmp_address; 218 if (head_length > 0) { 219 head_start = tmp_address; 220 tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED); 221 } 222 223 tail_start = address + length; 224 if (tail_start < tmp_address + tmp_length) { 225 tail_length = tmp_address + tmp_length - tail_start; 226 tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED); 227 } 228 229 return 0; 230 } 231 232 static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_) 233 { 234 const TdxRamEntry *lhs = lhs_; 235 const TdxRamEntry *rhs = rhs_; 236 237 if (lhs->address == rhs->address) { 238 return 0; 239 } 240 if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) { 241 return 1; 242 } 243 return -1; 244 } 245 246 static void tdx_init_ram_entries(void) 247 { 248 unsigned i, j, nr_e820_entries; 249 250 nr_e820_entries = e820_get_table(NULL); 251 tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries); 252 253 for (i = 0, j = 0; i < nr_e820_entries; i++) { 254 uint64_t addr, len; 255 256 if (e820_get_entry(i, E820_RAM, &addr, &len)) { 257 tdx_guest->ram_entries[j].address = addr; 258 tdx_guest->ram_entries[j].length = len; 259 tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED; 260 j++; 261 } 262 } 263 tdx_guest->nr_ram_entries = j; 264 } 265 266 static void tdx_post_init_vcpus(void) 267 { 268 TdxFirmwareEntry *hob; 269 CPUState *cpu; 270 271 hob = tdx_get_hob_entry(tdx_guest); 272 CPU_FOREACH(cpu) { 273 tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)hob->address, 274 &error_fatal); 275 } 276 } 277 278 static void tdx_finalize_vm(Notifier *notifier, void *unused) 279 { 280 TdxFirmware *tdvf = &tdx_guest->tdvf; 281 TdxFirmwareEntry *entry; 282 RAMBlock *ram_block; 283 Error *local_err = NULL; 284 int r; 285 286 tdx_init_ram_entries(); 287 288 for_each_tdx_fw_entry(tdvf, entry) { 289 switch (entry->type) { 290 case TDVF_SECTION_TYPE_BFV: 291 case TDVF_SECTION_TYPE_CFV: 292 entry->mem_ptr = tdvf->mem_ptr + entry->data_offset; 293 break; 294 case TDVF_SECTION_TYPE_TD_HOB: 295 case TDVF_SECTION_TYPE_TEMP_MEM: 296 entry->mem_ptr = qemu_ram_mmap(-1, entry->size, 297 qemu_real_host_page_size(), 0, 0); 298 if (entry->mem_ptr == MAP_FAILED) { 299 error_report("Failed to mmap memory for TDVF section %d", 300 entry->type); 301 exit(1); 302 } 303 if (tdx_accept_ram_range(entry->address, entry->size)) { 304 error_report("Failed to accept memory for TDVF section %d", 305 entry->type); 306 qemu_ram_munmap(-1, entry->mem_ptr, entry->size); 307 exit(1); 308 } 309 break; 310 default: 311 error_report("Unsupported TDVF section %d", entry->type); 312 exit(1); 313 } 314 } 315 316 qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries, 317 sizeof(TdxRamEntry), &tdx_ram_entry_compare); 318 319 tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest)); 320 321 tdx_post_init_vcpus(); 322 323 for_each_tdx_fw_entry(tdvf, entry) { 324 struct kvm_tdx_init_mem_region region; 325 uint32_t flags; 326 327 region = (struct kvm_tdx_init_mem_region) { 328 .source_addr = (uint64_t)entry->mem_ptr, 329 .gpa = entry->address, 330 .nr_pages = entry->size >> 12, 331 }; 332 333 flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ? 334 KVM_TDX_MEASURE_MEMORY_REGION : 0; 335 336 do { 337 error_free(local_err); 338 local_err = NULL; 339 r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags, 340 ®ion, &local_err); 341 } while (r == -EAGAIN || r == -EINTR); 342 if (r < 0) { 343 error_report_err(local_err); 344 exit(1); 345 } 346 347 if (entry->type == TDVF_SECTION_TYPE_TD_HOB || 348 entry->type == TDVF_SECTION_TYPE_TEMP_MEM) { 349 qemu_ram_munmap(-1, entry->mem_ptr, entry->size); 350 entry->mem_ptr = NULL; 351 } 352 } 353 354 /* 355 * TDVF image has been copied into private region above via 356 * KVM_MEMORY_MAPPING. It becomes useless. 357 */ 358 ram_block = tdx_guest->tdvf_mr->ram_block; 359 ram_block_discard_range(ram_block, 0, ram_block->max_length); 360 361 tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal); 362 CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true; 363 } 364 365 static Notifier tdx_machine_done_notify = { 366 .notify = tdx_finalize_vm, 367 }; 368 369 static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) 370 { 371 MachineState *ms = MACHINE(qdev_get_machine()); 372 X86MachineState *x86ms = X86_MACHINE(ms); 373 TdxGuest *tdx = TDX_GUEST(cgs); 374 int r = 0; 375 376 kvm_mark_guest_state_protected(); 377 378 if (x86ms->smm == ON_OFF_AUTO_AUTO) { 379 x86ms->smm = ON_OFF_AUTO_OFF; 380 } else if (x86ms->smm == ON_OFF_AUTO_ON) { 381 error_setg(errp, "TDX VM doesn't support SMM"); 382 return -EINVAL; 383 } 384 385 if (x86ms->pic == ON_OFF_AUTO_AUTO) { 386 x86ms->pic = ON_OFF_AUTO_OFF; 387 } else if (x86ms->pic == ON_OFF_AUTO_ON) { 388 error_setg(errp, "TDX VM doesn't support PIC"); 389 return -EINVAL; 390 } 391 392 if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { 393 kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON; 394 } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) { 395 error_setg(errp, "TDX VM requires kernel_irqchip to be split"); 396 return -EINVAL; 397 } 398 399 if (!tdx_caps) { 400 r = get_tdx_capabilities(errp); 401 if (r) { 402 return r; 403 } 404 } 405 406 /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */ 407 if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { 408 return -EOPNOTSUPP; 409 } 410 411 /* 412 * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly 413 * memory for shared memory but not for private memory. Besides, whether a 414 * memslot is private or shared is not determined by QEMU. 415 * 416 * Thus, just mark readonly memory not supported for simplicity. 417 */ 418 kvm_readonly_mem_allowed = false; 419 420 qemu_add_machine_init_done_notifier(&tdx_machine_done_notify); 421 422 tdx_guest = tdx; 423 return 0; 424 } 425 426 static int tdx_kvm_type(X86ConfidentialGuest *cg) 427 { 428 /* Do the object check */ 429 TDX_GUEST(cg); 430 431 return KVM_X86_TDX_VM; 432 } 433 434 static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) 435 { 436 X86CPU *x86cpu = X86_CPU(cpu); 437 438 object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort); 439 440 x86cpu->enable_cpuid_0x1f = true; 441 } 442 443 static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) 444 { 445 if ((tdx->attributes & ~tdx_caps->supported_attrs)) { 446 error_setg(errp, "Invalid attributes 0x%lx for TDX VM " 447 "(KVM supported: 0x%llx)", tdx->attributes, 448 tdx_caps->supported_attrs); 449 return -1; 450 } 451 452 if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) { 453 error_setg(errp, "Some QEMU unsupported TD attribute bits being " 454 "requested: 0x%lx (QEMU supported: 0x%llx)", 455 tdx->attributes, TDX_SUPPORTED_TD_ATTRS); 456 return -1; 457 } 458 459 return 0; 460 } 461 462 static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp) 463 { 464 CPUX86State *env = &x86cpu->env; 465 466 tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ? 467 TDX_TD_ATTRIBUTES_PKS : 0; 468 tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0; 469 470 return tdx_validate_attributes(tdx_guest, errp); 471 } 472 473 static int setup_td_xfam(X86CPU *x86cpu, Error **errp) 474 { 475 CPUX86State *env = &x86cpu->env; 476 uint64_t xfam; 477 478 xfam = env->features[FEAT_XSAVE_XCR0_LO] | 479 env->features[FEAT_XSAVE_XCR0_HI] | 480 env->features[FEAT_XSAVE_XSS_LO] | 481 env->features[FEAT_XSAVE_XSS_HI]; 482 483 if (xfam & ~tdx_caps->supported_xfam) { 484 error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))", 485 xfam, tdx_caps->supported_xfam); 486 return -1; 487 } 488 489 tdx_guest->xfam = xfam; 490 return 0; 491 } 492 493 static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids) 494 { 495 int i, dest_cnt = 0; 496 struct kvm_cpuid_entry2 *src, *dest, *conf; 497 498 for (i = 0; i < cpuids->nent; i++) { 499 src = cpuids->entries + i; 500 conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index); 501 if (!conf) { 502 continue; 503 } 504 dest = cpuids->entries + dest_cnt; 505 506 dest->function = src->function; 507 dest->index = src->index; 508 dest->flags = src->flags; 509 dest->eax = src->eax & conf->eax; 510 dest->ebx = src->ebx & conf->ebx; 511 dest->ecx = src->ecx & conf->ecx; 512 dest->edx = src->edx & conf->edx; 513 514 dest_cnt++; 515 } 516 cpuids->nent = dest_cnt++; 517 } 518 519 int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) 520 { 521 X86CPU *x86cpu = X86_CPU(cpu); 522 CPUX86State *env = &x86cpu->env; 523 g_autofree struct kvm_tdx_init_vm *init_vm = NULL; 524 Error *local_err = NULL; 525 size_t data_len; 526 int retry = 10000; 527 int r = 0; 528 529 QEMU_LOCK_GUARD(&tdx_guest->lock); 530 if (tdx_guest->initialized) { 531 return r; 532 } 533 534 init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + 535 sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); 536 537 if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) { 538 error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS"); 539 return -EOPNOTSUPP; 540 } 541 542 r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS, 543 0, TDX_APIC_BUS_CYCLES_NS); 544 if (r < 0) { 545 error_setg_errno(errp, -r, 546 "Unable to set core crystal clock frequency to 25MHz"); 547 return r; 548 } 549 550 if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ || 551 env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) { 552 error_setg(errp, "Invalid TSC %ld KHz, must specify cpu_frequency " 553 "between [%d, %d] kHz", env->tsc_khz, 554 TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ); 555 return -EINVAL; 556 } 557 558 if (env->tsc_khz % (25 * 1000)) { 559 error_setg(errp, "Invalid TSC %ld KHz, it must be multiple of 25MHz", 560 env->tsc_khz); 561 return -EINVAL; 562 } 563 564 /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */ 565 r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz); 566 if (r < 0) { 567 error_setg_errno(errp, -r, "Unable to set TSC frequency to %ld kHz", 568 env->tsc_khz); 569 return r; 570 } 571 572 if (tdx_guest->mrconfigid) { 573 g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid, 574 strlen(tdx_guest->mrconfigid), &data_len, errp); 575 if (!data) { 576 return -1; 577 } 578 if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { 579 error_setg(errp, "TDX: failed to decode mrconfigid"); 580 return -1; 581 } 582 memcpy(init_vm->mrconfigid, data, data_len); 583 } 584 585 if (tdx_guest->mrowner) { 586 g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner, 587 strlen(tdx_guest->mrowner), &data_len, errp); 588 if (!data) { 589 return -1; 590 } 591 if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { 592 error_setg(errp, "TDX: failed to decode mrowner"); 593 return -1; 594 } 595 memcpy(init_vm->mrowner, data, data_len); 596 } 597 598 if (tdx_guest->mrownerconfig) { 599 g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig, 600 strlen(tdx_guest->mrownerconfig), &data_len, errp); 601 if (!data) { 602 return -1; 603 } 604 if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { 605 error_setg(errp, "TDX: failed to decode mrownerconfig"); 606 return -1; 607 } 608 memcpy(init_vm->mrownerconfig, data, data_len); 609 } 610 611 r = setup_td_guest_attributes(x86cpu, errp); 612 if (r) { 613 return r; 614 } 615 616 r = setup_td_xfam(x86cpu, errp); 617 if (r) { 618 return r; 619 } 620 621 init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0); 622 tdx_filter_cpuid(&init_vm->cpuid); 623 624 init_vm->attributes = tdx_guest->attributes; 625 init_vm->xfam = tdx_guest->xfam; 626 627 /* 628 * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE) 629 * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or 630 * RDSEED) is busy. 631 * 632 * Retry for the case. 633 */ 634 do { 635 error_free(local_err); 636 local_err = NULL; 637 r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err); 638 } while (r == -EAGAIN && --retry); 639 640 if (r < 0) { 641 if (!retry) { 642 error_append_hint(&local_err, "Hardware RNG (Random Number " 643 "Generator) is busy occupied by someone (via RDRAND/RDSEED) " 644 "maliciously, which leads to KVM_TDX_INIT_VM keeping failure " 645 "due to lack of entropy.\n"); 646 } 647 error_propagate(errp, local_err); 648 return r; 649 } 650 651 tdx_guest->initialized = true; 652 653 return 0; 654 } 655 656 int tdx_parse_tdvf(void *flash_ptr, int size) 657 { 658 return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); 659 } 660 661 static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code, 662 char *message, uint64_t gpa) 663 { 664 GuestPanicInformation *panic_info; 665 666 panic_info = g_new0(GuestPanicInformation, 1); 667 panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX; 668 panic_info->u.tdx.error_code = (uint32_t) error_code; 669 panic_info->u.tdx.message = message; 670 panic_info->u.tdx.gpa = gpa; 671 672 qemu_system_guest_panicked(panic_info); 673 } 674 675 /* 676 * Only 8 registers can contain valid ASCII byte stream to form the fatal 677 * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX 678 */ 679 #define TDX_FATAL_MESSAGE_MAX 64 680 681 #define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63) 682 683 int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) 684 { 685 uint64_t error_code = run->system_event.data[R_R12]; 686 uint64_t reg_mask = run->system_event.data[R_ECX]; 687 char *message = NULL; 688 uint64_t *tmp; 689 uint64_t gpa = -1ull; 690 691 if (error_code & 0xffff) { 692 error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%lx", 693 error_code); 694 return -1; 695 } 696 697 if (reg_mask) { 698 message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1); 699 tmp = (uint64_t *)message; 700 701 #define COPY_REG(REG) \ 702 do { \ 703 if (reg_mask & BIT_ULL(REG)) { \ 704 *(tmp++) = run->system_event.data[REG]; \ 705 } \ 706 } while (0) 707 708 COPY_REG(R_R14); 709 COPY_REG(R_R15); 710 COPY_REG(R_EBX); 711 COPY_REG(R_EDI); 712 COPY_REG(R_ESI); 713 COPY_REG(R_R8); 714 COPY_REG(R_R9); 715 COPY_REG(R_EDX); 716 *((char *)tmp) = '\0'; 717 } 718 #undef COPY_REG 719 720 if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) { 721 gpa = run->system_event.data[R_R13]; 722 } 723 724 tdx_panicked_on_fatal_error(cpu, error_code, message, gpa); 725 726 return -1; 727 } 728 729 static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) 730 { 731 TdxGuest *tdx = TDX_GUEST(obj); 732 733 return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE); 734 } 735 736 static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp) 737 { 738 TdxGuest *tdx = TDX_GUEST(obj); 739 740 if (value) { 741 tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 742 } else { 743 tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 744 } 745 } 746 747 static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp) 748 { 749 TdxGuest *tdx = TDX_GUEST(obj); 750 751 return g_strdup(tdx->mrconfigid); 752 } 753 754 static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp) 755 { 756 TdxGuest *tdx = TDX_GUEST(obj); 757 758 g_free(tdx->mrconfigid); 759 tdx->mrconfigid = g_strdup(value); 760 } 761 762 static char *tdx_guest_get_mrowner(Object *obj, Error **errp) 763 { 764 TdxGuest *tdx = TDX_GUEST(obj); 765 766 return g_strdup(tdx->mrowner); 767 } 768 769 static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp) 770 { 771 TdxGuest *tdx = TDX_GUEST(obj); 772 773 g_free(tdx->mrowner); 774 tdx->mrowner = g_strdup(value); 775 } 776 777 static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp) 778 { 779 TdxGuest *tdx = TDX_GUEST(obj); 780 781 return g_strdup(tdx->mrownerconfig); 782 } 783 784 static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp) 785 { 786 TdxGuest *tdx = TDX_GUEST(obj); 787 788 g_free(tdx->mrownerconfig); 789 tdx->mrownerconfig = g_strdup(value); 790 } 791 792 /* tdx guest */ 793 OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, 794 tdx_guest, 795 TDX_GUEST, 796 X86_CONFIDENTIAL_GUEST, 797 { TYPE_USER_CREATABLE }, 798 { NULL }) 799 800 static void tdx_guest_init(Object *obj) 801 { 802 ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); 803 TdxGuest *tdx = TDX_GUEST(obj); 804 805 qemu_mutex_init(&tdx->lock); 806 807 cgs->require_guest_memfd = true; 808 tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; 809 810 object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, 811 OBJ_PROP_FLAG_READWRITE); 812 object_property_add_bool(obj, "sept-ve-disable", 813 tdx_guest_get_sept_ve_disable, 814 tdx_guest_set_sept_ve_disable); 815 object_property_add_str(obj, "mrconfigid", 816 tdx_guest_get_mrconfigid, 817 tdx_guest_set_mrconfigid); 818 object_property_add_str(obj, "mrowner", 819 tdx_guest_get_mrowner, tdx_guest_set_mrowner); 820 object_property_add_str(obj, "mrownerconfig", 821 tdx_guest_get_mrownerconfig, 822 tdx_guest_set_mrownerconfig); 823 } 824 825 static void tdx_guest_finalize(Object *obj) 826 { 827 } 828 829 static void tdx_guest_class_init(ObjectClass *oc, const void *data) 830 { 831 ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); 832 X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); 833 834 klass->kvm_init = tdx_kvm_init; 835 x86_klass->kvm_type = tdx_kvm_type; 836 x86_klass->cpu_instance_init = tdx_cpu_instance_init; 837 } 838