1 /* 2 * QEMU PAPR Storage Class Memory Interfaces 3 * 4 * Copyright (c) 2019-2020, IBM Corporation. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "qemu/osdep.h" 25 #include "qemu/cutils.h" 26 #include "qapi/error.h" 27 #include "hw/ppc/spapr_drc.h" 28 #include "hw/ppc/spapr_nvdimm.h" 29 #include "hw/mem/nvdimm.h" 30 #include "qemu/nvdimm-utils.h" 31 #include "hw/ppc/fdt.h" 32 #include "qemu/range.h" 33 #include "hw/ppc/spapr_numa.h" 34 #include "block/thread-pool.h" 35 #include "migration/vmstate.h" 36 #include "qemu/pmem.h" 37 #include "hw/qdev-properties.h" 38 39 /* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */ 40 /* SCM device is unable to persist memory contents */ 41 #define PAPR_PMEM_UNARMED PPC_BIT(0) 42 43 /* 44 * The nvdimm size should be aligned to SCM block size. 45 * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE 46 * in order to have SCM regions not to overlap with dimm memory regions. 47 * The SCM devices can have variable block sizes. For now, fixing the 48 * block size to the minimum value. 49 */ 50 #define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE 51 52 /* Have an explicit check for alignment */ 53 QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE); 54 55 #define TYPE_SPAPR_NVDIMM "spapr-nvdimm" 56 OBJECT_DECLARE_TYPE(SpaprNVDIMMDevice, SPAPRNVDIMMClass, SPAPR_NVDIMM) 57 58 struct SPAPRNVDIMMClass { 59 /* private */ 60 NVDIMMClass parent_class; 61 62 /* public */ 63 void (*realize)(NVDIMMDevice *dimm, Error **errp); 64 void (*unrealize)(NVDIMMDevice *dimm, Error **errp); 65 }; 66 67 bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm, 68 uint64_t size, Error **errp) 69 { 70 const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); 71 const MachineState *ms = MACHINE(hotplug_dev); 72 PCDIMMDevice *dimm = PC_DIMM(nvdimm); 73 MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem); 74 g_autofree char *uuidstr = NULL; 75 QemuUUID uuid; 76 int ret; 77 78 if (!mc->nvdimm_supported) { 79 error_setg(errp, "NVDIMM hotplug not supported for this machine"); 80 return false; 81 } 82 83 if (!ms->nvdimms_state->is_enabled) { 84 error_setg(errp, "nvdimm device found but 'nvdimm=off' was set"); 85 return false; 86 } 87 88 if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP, 89 &error_abort) == 0) { 90 error_setg(errp, "PAPR requires NVDIMM devices to have label-size set"); 91 return false; 92 } 93 94 if (size % SPAPR_MINIMUM_SCM_BLOCK_SIZE) { 95 error_setg(errp, "PAPR requires NVDIMM memory size (excluding label)" 96 " to be a multiple of %" PRIu64 "MB", 97 SPAPR_MINIMUM_SCM_BLOCK_SIZE / MiB); 98 return false; 99 } 100 101 uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP, 102 &error_abort); 103 ret = qemu_uuid_parse(uuidstr, &uuid); 104 g_assert(!ret); 105 106 if (qemu_uuid_is_null(&uuid)) { 107 error_setg(errp, "NVDIMM device requires the uuid to be set"); 108 return false; 109 } 110 111 if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM) && 112 (memory_region_get_fd(mr) < 0)) { 113 error_setg(errp, "spapr-nvdimm device requires the " 114 "memdev %s to be of memory-backend-file type", 115 object_get_canonical_path_component(OBJECT(dimm->hostmem))); 116 return false; 117 } 118 119 return true; 120 } 121 122 123 void spapr_add_nvdimm(DeviceState *dev, uint64_t slot) 124 { 125 SpaprDrc *drc; 126 bool hotplugged = spapr_drc_hotplugged(dev); 127 128 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot); 129 g_assert(drc); 130 131 /* 132 * pc_dimm_get_free_slot() provided a free slot at pre-plug. The 133 * corresponding DRC is thus assumed to be attachable. 134 */ 135 spapr_drc_attach(drc, dev); 136 137 if (hotplugged) { 138 spapr_hotplug_req_add_by_index(drc); 139 } 140 } 141 142 static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt, 143 int parent_offset, NVDIMMDevice *nvdimm) 144 { 145 int child_offset; 146 char *buf; 147 SpaprDrc *drc; 148 uint32_t drc_idx; 149 uint32_t node = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_NODE_PROP, 150 &error_abort); 151 uint64_t slot = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_SLOT_PROP, 152 &error_abort); 153 uint64_t lsize = nvdimm->label_size; 154 uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, 155 NULL); 156 157 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot); 158 g_assert(drc); 159 160 drc_idx = spapr_drc_index(drc); 161 162 buf = g_strdup_printf("ibm,pmemory@%x", drc_idx); 163 child_offset = fdt_add_subnode(fdt, parent_offset, buf); 164 g_free(buf); 165 166 _FDT(child_offset); 167 168 _FDT((fdt_setprop_cell(fdt, child_offset, "reg", drc_idx))); 169 _FDT((fdt_setprop_string(fdt, child_offset, "compatible", "ibm,pmemory"))); 170 _FDT((fdt_setprop_string(fdt, child_offset, "device_type", "ibm,pmemory"))); 171 172 spapr_numa_write_associativity_dt(spapr, fdt, child_offset, node); 173 174 buf = qemu_uuid_unparse_strdup(&nvdimm->uuid); 175 _FDT((fdt_setprop_string(fdt, child_offset, "ibm,unit-guid", buf))); 176 g_free(buf); 177 178 _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,my-drc-index", drc_idx))); 179 180 _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,block-size", 181 SPAPR_MINIMUM_SCM_BLOCK_SIZE))); 182 _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,number-of-blocks", 183 size / SPAPR_MINIMUM_SCM_BLOCK_SIZE))); 184 _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,metadata-size", lsize))); 185 186 _FDT((fdt_setprop_string(fdt, child_offset, "ibm,pmem-application", 187 "operating-system"))); 188 _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0)); 189 190 if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) { 191 bool is_pmem = false, pmem_override = false; 192 PCDIMMDevice *dimm = PC_DIMM(nvdimm); 193 HostMemoryBackend *hostmem = dimm->hostmem; 194 195 is_pmem = object_property_get_bool(OBJECT(hostmem), "pmem", NULL); 196 pmem_override = object_property_get_bool(OBJECT(nvdimm), 197 "pmem-override", NULL); 198 if (!is_pmem || pmem_override) { 199 _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required", 200 NULL, 0)); 201 } 202 } 203 204 return child_offset; 205 } 206 207 int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, 208 void *fdt, int *fdt_start_offset, Error **errp) 209 { 210 NVDIMMDevice *nvdimm = NVDIMM(drc->dev); 211 212 *fdt_start_offset = spapr_dt_nvdimm(spapr, fdt, 0, nvdimm); 213 214 return 0; 215 } 216 217 void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt) 218 { 219 int offset = fdt_subnode_offset(fdt, 0, "ibm,persistent-memory"); 220 GSList *iter, *nvdimms = nvdimm_get_device_list(); 221 222 if (offset < 0) { 223 offset = fdt_add_subnode(fdt, 0, "ibm,persistent-memory"); 224 _FDT(offset); 225 _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1))); 226 _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0))); 227 _FDT((fdt_setprop_string(fdt, offset, "device_type", 228 "ibm,persistent-memory"))); 229 } 230 231 /* Create DT entries for cold plugged NVDIMM devices */ 232 for (iter = nvdimms; iter; iter = iter->next) { 233 NVDIMMDevice *nvdimm = iter->data; 234 235 spapr_dt_nvdimm(spapr, fdt, offset, nvdimm); 236 } 237 g_slist_free(nvdimms); 238 } 239 240 static target_ulong h_scm_read_metadata(PowerPCCPU *cpu, 241 SpaprMachineState *spapr, 242 target_ulong opcode, 243 target_ulong *args) 244 { 245 uint32_t drc_index = args[0]; 246 uint64_t offset = args[1]; 247 uint64_t len = args[2]; 248 SpaprDrc *drc = spapr_drc_by_index(drc_index); 249 NVDIMMDevice *nvdimm; 250 NVDIMMClass *ddc; 251 uint64_t data = 0; 252 uint8_t buf[8] = { 0 }; 253 254 if (!drc || !drc->dev || 255 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 256 return H_PARAMETER; 257 } 258 259 if (len != 1 && len != 2 && 260 len != 4 && len != 8) { 261 return H_P3; 262 } 263 264 nvdimm = NVDIMM(drc->dev); 265 if ((offset + len < offset) || 266 (nvdimm->label_size < len + offset)) { 267 return H_P2; 268 } 269 270 ddc = NVDIMM_GET_CLASS(nvdimm); 271 ddc->read_label_data(nvdimm, buf, len, offset); 272 273 switch (len) { 274 case 1: 275 data = ldub_p(buf); 276 break; 277 case 2: 278 data = lduw_be_p(buf); 279 break; 280 case 4: 281 data = ldl_be_p(buf); 282 break; 283 case 8: 284 data = ldq_be_p(buf); 285 break; 286 default: 287 g_assert_not_reached(); 288 } 289 290 args[0] = data; 291 292 return H_SUCCESS; 293 } 294 295 static target_ulong h_scm_write_metadata(PowerPCCPU *cpu, 296 SpaprMachineState *spapr, 297 target_ulong opcode, 298 target_ulong *args) 299 { 300 uint32_t drc_index = args[0]; 301 uint64_t offset = args[1]; 302 uint64_t data = args[2]; 303 uint64_t len = args[3]; 304 SpaprDrc *drc = spapr_drc_by_index(drc_index); 305 NVDIMMDevice *nvdimm; 306 NVDIMMClass *ddc; 307 uint8_t buf[8] = { 0 }; 308 309 if (!drc || !drc->dev || 310 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 311 return H_PARAMETER; 312 } 313 314 if (len != 1 && len != 2 && 315 len != 4 && len != 8) { 316 return H_P4; 317 } 318 319 nvdimm = NVDIMM(drc->dev); 320 if ((offset + len < offset) || 321 (nvdimm->label_size < len + offset) || 322 nvdimm->readonly) { 323 return H_P2; 324 } 325 326 switch (len) { 327 case 1: 328 if (data & 0xffffffffffffff00) { 329 return H_P2; 330 } 331 stb_p(buf, data); 332 break; 333 case 2: 334 if (data & 0xffffffffffff0000) { 335 return H_P2; 336 } 337 stw_be_p(buf, data); 338 break; 339 case 4: 340 if (data & 0xffffffff00000000) { 341 return H_P2; 342 } 343 stl_be_p(buf, data); 344 break; 345 case 8: 346 stq_be_p(buf, data); 347 break; 348 default: 349 g_assert_not_reached(); 350 } 351 352 ddc = NVDIMM_GET_CLASS(nvdimm); 353 ddc->write_label_data(nvdimm, buf, len, offset); 354 355 return H_SUCCESS; 356 } 357 358 static target_ulong h_scm_bind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr, 359 target_ulong opcode, target_ulong *args) 360 { 361 uint32_t drc_index = args[0]; 362 uint64_t starting_idx = args[1]; 363 uint64_t no_of_scm_blocks_to_bind = args[2]; 364 uint64_t target_logical_mem_addr = args[3]; 365 uint64_t continue_token = args[4]; 366 uint64_t size; 367 uint64_t total_no_of_scm_blocks; 368 SpaprDrc *drc = spapr_drc_by_index(drc_index); 369 hwaddr addr; 370 NVDIMMDevice *nvdimm; 371 372 if (!drc || !drc->dev || 373 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 374 return H_PARAMETER; 375 } 376 377 /* 378 * Currently continue token should be zero qemu has already bound 379 * everything and this hcall doesn't return H_BUSY. 380 */ 381 if (continue_token > 0) { 382 return H_P5; 383 } 384 385 /* Currently qemu assigns the address. */ 386 if (target_logical_mem_addr != 0xffffffffffffffff) { 387 return H_OVERLAP; 388 } 389 390 nvdimm = NVDIMM(drc->dev); 391 392 size = object_property_get_uint(OBJECT(nvdimm), 393 PC_DIMM_SIZE_PROP, &error_abort); 394 395 total_no_of_scm_blocks = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; 396 397 if (starting_idx > total_no_of_scm_blocks) { 398 return H_P2; 399 } 400 401 if (((starting_idx + no_of_scm_blocks_to_bind) < starting_idx) || 402 ((starting_idx + no_of_scm_blocks_to_bind) > total_no_of_scm_blocks)) { 403 return H_P3; 404 } 405 406 addr = object_property_get_uint(OBJECT(nvdimm), 407 PC_DIMM_ADDR_PROP, &error_abort); 408 409 addr += starting_idx * SPAPR_MINIMUM_SCM_BLOCK_SIZE; 410 411 /* Already bound, Return target logical address in R5 */ 412 args[1] = addr; 413 args[2] = no_of_scm_blocks_to_bind; 414 415 return H_SUCCESS; 416 } 417 418 typedef struct SpaprNVDIMMDeviceFlushState { 419 uint64_t continue_token; 420 int64_t hcall_ret; 421 uint32_t drcidx; 422 423 QLIST_ENTRY(SpaprNVDIMMDeviceFlushState) node; 424 } SpaprNVDIMMDeviceFlushState; 425 426 typedef struct SpaprNVDIMMDevice SpaprNVDIMMDevice; 427 struct SpaprNVDIMMDevice { 428 /* private */ 429 NVDIMMDevice parent_obj; 430 431 bool hcall_flush_required; 432 uint64_t nvdimm_flush_token; 433 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState) pending_nvdimm_flush_states; 434 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState) completed_nvdimm_flush_states; 435 436 /* public */ 437 438 /* 439 * The 'on' value for this property forced the qemu to enable the hcall 440 * flush for the nvdimm device even if the backend is a pmem 441 */ 442 bool pmem_override; 443 }; 444 445 static int flush_worker_cb(void *opaque) 446 { 447 SpaprNVDIMMDeviceFlushState *state = opaque; 448 SpaprDrc *drc = spapr_drc_by_index(state->drcidx); 449 PCDIMMDevice *dimm; 450 HostMemoryBackend *backend; 451 int backend_fd; 452 453 g_assert(drc != NULL); 454 455 dimm = PC_DIMM(drc->dev); 456 backend = MEMORY_BACKEND(dimm->hostmem); 457 backend_fd = memory_region_get_fd(&backend->mr); 458 459 if (object_property_get_bool(OBJECT(backend), "pmem", NULL)) { 460 MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem); 461 void *ptr = memory_region_get_ram_ptr(mr); 462 size_t size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP, 463 NULL); 464 465 /* flush pmem backend */ 466 pmem_persist(ptr, size); 467 } else { 468 /* flush raw backing image */ 469 if (qemu_fdatasync(backend_fd) < 0) { 470 error_report("papr_scm: Could not sync nvdimm to backend file: %s", 471 strerror(errno)); 472 return H_HARDWARE; 473 } 474 } 475 476 return H_SUCCESS; 477 } 478 479 static void spapr_nvdimm_flush_completion_cb(void *opaque, int hcall_ret) 480 { 481 SpaprNVDIMMDeviceFlushState *state = opaque; 482 SpaprDrc *drc = spapr_drc_by_index(state->drcidx); 483 SpaprNVDIMMDevice *s_nvdimm; 484 485 g_assert(drc != NULL); 486 487 s_nvdimm = SPAPR_NVDIMM(drc->dev); 488 489 state->hcall_ret = hcall_ret; 490 QLIST_REMOVE(state, node); 491 QLIST_INSERT_HEAD(&s_nvdimm->completed_nvdimm_flush_states, state, node); 492 } 493 494 static int spapr_nvdimm_flush_post_load(void *opaque, int version_id) 495 { 496 SpaprNVDIMMDevice *s_nvdimm = (SpaprNVDIMMDevice *)opaque; 497 SpaprNVDIMMDeviceFlushState *state; 498 HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(s_nvdimm)->hostmem); 499 bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL); 500 bool pmem_override = object_property_get_bool(OBJECT(s_nvdimm), 501 "pmem-override", NULL); 502 bool dest_hcall_flush_required = pmem_override || !is_pmem; 503 504 if (!s_nvdimm->hcall_flush_required && dest_hcall_flush_required) { 505 error_report("The file backend for the spapr-nvdimm device %s at " 506 "source is a pmem, use pmem=on and pmem-override=off to " 507 "continue.", DEVICE(s_nvdimm)->id); 508 return -EINVAL; 509 } 510 if (s_nvdimm->hcall_flush_required && !dest_hcall_flush_required) { 511 error_report("The guest expects hcall-flush support for the " 512 "spapr-nvdimm device %s, use pmem_override=on to " 513 "continue.", DEVICE(s_nvdimm)->id); 514 return -EINVAL; 515 } 516 517 QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) { 518 thread_pool_submit_aio(flush_worker_cb, state, 519 spapr_nvdimm_flush_completion_cb, state); 520 } 521 522 return 0; 523 } 524 525 static const VMStateDescription vmstate_spapr_nvdimm_flush_state = { 526 .name = "spapr_nvdimm_flush_state", 527 .version_id = 1, 528 .minimum_version_id = 1, 529 .fields = (const VMStateField[]) { 530 VMSTATE_UINT64(continue_token, SpaprNVDIMMDeviceFlushState), 531 VMSTATE_INT64(hcall_ret, SpaprNVDIMMDeviceFlushState), 532 VMSTATE_UINT32(drcidx, SpaprNVDIMMDeviceFlushState), 533 VMSTATE_END_OF_LIST() 534 }, 535 }; 536 537 const VMStateDescription vmstate_spapr_nvdimm_states = { 538 .name = "spapr_nvdimm_states", 539 .version_id = 1, 540 .minimum_version_id = 1, 541 .post_load = spapr_nvdimm_flush_post_load, 542 .fields = (const VMStateField[]) { 543 VMSTATE_BOOL(hcall_flush_required, SpaprNVDIMMDevice), 544 VMSTATE_UINT64(nvdimm_flush_token, SpaprNVDIMMDevice), 545 VMSTATE_QLIST_V(completed_nvdimm_flush_states, SpaprNVDIMMDevice, 1, 546 vmstate_spapr_nvdimm_flush_state, 547 SpaprNVDIMMDeviceFlushState, node), 548 VMSTATE_QLIST_V(pending_nvdimm_flush_states, SpaprNVDIMMDevice, 1, 549 vmstate_spapr_nvdimm_flush_state, 550 SpaprNVDIMMDeviceFlushState, node), 551 VMSTATE_END_OF_LIST() 552 }, 553 }; 554 555 /* 556 * Assign a token and reserve it for the new flush state. 557 */ 558 static SpaprNVDIMMDeviceFlushState *spapr_nvdimm_init_new_flush_state( 559 SpaprNVDIMMDevice *spapr_nvdimm) 560 { 561 SpaprNVDIMMDeviceFlushState *state; 562 563 state = g_malloc0(sizeof(*state)); 564 565 spapr_nvdimm->nvdimm_flush_token++; 566 /* Token zero is presumed as no job pending. Assert on overflow to zero */ 567 g_assert(spapr_nvdimm->nvdimm_flush_token != 0); 568 569 state->continue_token = spapr_nvdimm->nvdimm_flush_token; 570 571 QLIST_INSERT_HEAD(&spapr_nvdimm->pending_nvdimm_flush_states, state, node); 572 573 return state; 574 } 575 576 /* 577 * spapr_nvdimm_finish_flushes 578 * Waits for all pending flush requests to complete 579 * their execution and free the states 580 */ 581 void spapr_nvdimm_finish_flushes(void) 582 { 583 SpaprNVDIMMDeviceFlushState *state, *next; 584 GSList *list, *nvdimms; 585 586 /* 587 * Called on reset path, the main loop thread which calls 588 * the pending BHs has gotten out running in the reset path, 589 * finally reaching here. Other code path being guest 590 * h_client_architecture_support, that's early boot up. 591 */ 592 nvdimms = nvdimm_get_device_list(); 593 for (list = nvdimms; list; list = list->next) { 594 NVDIMMDevice *nvdimm = list->data; 595 if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) { 596 SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(nvdimm); 597 while (!QLIST_EMPTY(&s_nvdimm->pending_nvdimm_flush_states)) { 598 aio_poll(qemu_get_aio_context(), true); 599 } 600 601 QLIST_FOREACH_SAFE(state, &s_nvdimm->completed_nvdimm_flush_states, 602 node, next) { 603 QLIST_REMOVE(state, node); 604 g_free(state); 605 } 606 } 607 } 608 g_slist_free(nvdimms); 609 } 610 611 /* 612 * spapr_nvdimm_get_flush_status 613 * Fetches the status of the hcall worker and returns 614 * H_LONG_BUSY_ORDER_10_MSEC if the worker is still running. 615 */ 616 static int spapr_nvdimm_get_flush_status(SpaprNVDIMMDevice *s_nvdimm, 617 uint64_t token) 618 { 619 SpaprNVDIMMDeviceFlushState *state, *node; 620 621 QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) { 622 if (state->continue_token == token) { 623 return H_LONG_BUSY_ORDER_10_MSEC; 624 } 625 } 626 627 QLIST_FOREACH_SAFE(state, &s_nvdimm->completed_nvdimm_flush_states, 628 node, node) { 629 if (state->continue_token == token) { 630 int ret = state->hcall_ret; 631 QLIST_REMOVE(state, node); 632 g_free(state); 633 return ret; 634 } 635 } 636 637 /* If not found in complete list too, invalid token */ 638 return H_P2; 639 } 640 641 /* 642 * H_SCM_FLUSH 643 * Input: drc_index, continue-token 644 * Out: continue-token 645 * Return Value: H_SUCCESS, H_Parameter, H_P2, H_LONG_BUSY_ORDER_10_MSEC, 646 * H_UNSUPPORTED 647 * 648 * Given a DRC Index Flush the data to backend NVDIMM device. The hcall returns 649 * H_LONG_BUSY_ORDER_10_MSEC when the flush takes longer time and the hcall 650 * needs to be issued multiple times in order to be completely serviced. The 651 * continue-token from the output to be passed in the argument list of 652 * subsequent hcalls until the hcall is completely serviced at which point 653 * H_SUCCESS or other error is returned. 654 */ 655 static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr, 656 target_ulong opcode, target_ulong *args) 657 { 658 int ret; 659 uint32_t drc_index = args[0]; 660 uint64_t continue_token = args[1]; 661 SpaprDrc *drc = spapr_drc_by_index(drc_index); 662 PCDIMMDevice *dimm; 663 HostMemoryBackend *backend = NULL; 664 SpaprNVDIMMDeviceFlushState *state; 665 int fd; 666 667 if (!drc || !drc->dev || 668 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 669 return H_PARAMETER; 670 } 671 672 dimm = PC_DIMM(drc->dev); 673 if (!object_dynamic_cast(OBJECT(dimm), TYPE_SPAPR_NVDIMM)) { 674 return H_PARAMETER; 675 } 676 if (continue_token == 0) { 677 bool is_pmem = false, pmem_override = false; 678 backend = MEMORY_BACKEND(dimm->hostmem); 679 fd = memory_region_get_fd(&backend->mr); 680 681 if (fd < 0) { 682 return H_UNSUPPORTED; 683 } 684 685 is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL); 686 pmem_override = object_property_get_bool(OBJECT(dimm), 687 "pmem-override", NULL); 688 if (is_pmem && !pmem_override) { 689 return H_UNSUPPORTED; 690 } 691 692 state = spapr_nvdimm_init_new_flush_state(SPAPR_NVDIMM(dimm)); 693 if (!state) { 694 return H_HARDWARE; 695 } 696 697 state->drcidx = drc_index; 698 699 thread_pool_submit_aio(flush_worker_cb, state, 700 spapr_nvdimm_flush_completion_cb, state); 701 702 continue_token = state->continue_token; 703 } 704 705 ret = spapr_nvdimm_get_flush_status(SPAPR_NVDIMM(dimm), continue_token); 706 if (H_IS_LONG_BUSY(ret)) { 707 args[0] = continue_token; 708 } 709 710 return ret; 711 } 712 713 static target_ulong h_scm_unbind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr, 714 target_ulong opcode, target_ulong *args) 715 { 716 uint32_t drc_index = args[0]; 717 uint64_t starting_scm_logical_addr = args[1]; 718 uint64_t no_of_scm_blocks_to_unbind = args[2]; 719 uint64_t continue_token = args[3]; 720 uint64_t size_to_unbind; 721 Range blockrange = range_empty; 722 Range nvdimmrange = range_empty; 723 SpaprDrc *drc = spapr_drc_by_index(drc_index); 724 NVDIMMDevice *nvdimm; 725 uint64_t size, addr; 726 727 if (!drc || !drc->dev || 728 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 729 return H_PARAMETER; 730 } 731 732 /* continue_token should be zero as this hcall doesn't return H_BUSY. */ 733 if (continue_token > 0) { 734 return H_P4; 735 } 736 737 /* Check if starting_scm_logical_addr is block aligned */ 738 if (!QEMU_IS_ALIGNED(starting_scm_logical_addr, 739 SPAPR_MINIMUM_SCM_BLOCK_SIZE)) { 740 return H_P2; 741 } 742 743 size_to_unbind = no_of_scm_blocks_to_unbind * SPAPR_MINIMUM_SCM_BLOCK_SIZE; 744 if (no_of_scm_blocks_to_unbind == 0 || no_of_scm_blocks_to_unbind != 745 size_to_unbind / SPAPR_MINIMUM_SCM_BLOCK_SIZE) { 746 return H_P3; 747 } 748 749 nvdimm = NVDIMM(drc->dev); 750 size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, 751 &error_abort); 752 addr = object_property_get_int(OBJECT(nvdimm), PC_DIMM_ADDR_PROP, 753 &error_abort); 754 755 range_init_nofail(&nvdimmrange, addr, size); 756 range_init_nofail(&blockrange, starting_scm_logical_addr, size_to_unbind); 757 758 if (!range_contains_range(&nvdimmrange, &blockrange)) { 759 return H_P3; 760 } 761 762 args[1] = no_of_scm_blocks_to_unbind; 763 764 /* let unplug take care of actual unbind */ 765 return H_SUCCESS; 766 } 767 768 #define H_UNBIND_SCOPE_ALL 0x1 769 #define H_UNBIND_SCOPE_DRC 0x2 770 771 static target_ulong h_scm_unbind_all(PowerPCCPU *cpu, SpaprMachineState *spapr, 772 target_ulong opcode, target_ulong *args) 773 { 774 uint64_t target_scope = args[0]; 775 uint32_t drc_index = args[1]; 776 uint64_t continue_token = args[2]; 777 NVDIMMDevice *nvdimm; 778 uint64_t size; 779 uint64_t no_of_scm_blocks_unbound = 0; 780 781 /* continue_token should be zero as this hcall doesn't return H_BUSY. */ 782 if (continue_token > 0) { 783 return H_P4; 784 } 785 786 if (target_scope == H_UNBIND_SCOPE_DRC) { 787 SpaprDrc *drc = spapr_drc_by_index(drc_index); 788 789 if (!drc || !drc->dev || 790 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 791 return H_P2; 792 } 793 794 nvdimm = NVDIMM(drc->dev); 795 size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, 796 &error_abort); 797 798 no_of_scm_blocks_unbound = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; 799 } else if (target_scope == H_UNBIND_SCOPE_ALL) { 800 GSList *list, *nvdimms; 801 802 nvdimms = nvdimm_get_device_list(); 803 for (list = nvdimms; list; list = list->next) { 804 nvdimm = list->data; 805 size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, 806 &error_abort); 807 808 no_of_scm_blocks_unbound += size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; 809 } 810 g_slist_free(nvdimms); 811 } else { 812 return H_PARAMETER; 813 } 814 815 args[1] = no_of_scm_blocks_unbound; 816 817 /* let unplug take care of actual unbind */ 818 return H_SUCCESS; 819 } 820 821 static target_ulong h_scm_health(PowerPCCPU *cpu, SpaprMachineState *spapr, 822 target_ulong opcode, target_ulong *args) 823 { 824 825 NVDIMMDevice *nvdimm; 826 uint64_t hbitmap = 0; 827 uint32_t drc_index = args[0]; 828 SpaprDrc *drc = spapr_drc_by_index(drc_index); 829 const uint64_t hbitmap_mask = PAPR_PMEM_UNARMED; 830 831 832 /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */ 833 if (!drc || !drc->dev || 834 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 835 return H_PARAMETER; 836 } 837 838 nvdimm = NVDIMM(drc->dev); 839 840 /* Update if the nvdimm is unarmed and send its status via health bitmaps */ 841 if (object_property_get_bool(OBJECT(nvdimm), NVDIMM_UNARMED_PROP, NULL)) { 842 hbitmap |= PAPR_PMEM_UNARMED; 843 } 844 845 /* Update the out args with health bitmap/mask */ 846 args[0] = hbitmap; 847 args[1] = hbitmap_mask; 848 849 return H_SUCCESS; 850 } 851 852 static void spapr_scm_register_types(void) 853 { 854 /* qemu/scm specific hcalls */ 855 spapr_register_hypercall(H_SCM_READ_METADATA, h_scm_read_metadata); 856 spapr_register_hypercall(H_SCM_WRITE_METADATA, h_scm_write_metadata); 857 spapr_register_hypercall(H_SCM_BIND_MEM, h_scm_bind_mem); 858 spapr_register_hypercall(H_SCM_UNBIND_MEM, h_scm_unbind_mem); 859 spapr_register_hypercall(H_SCM_UNBIND_ALL, h_scm_unbind_all); 860 spapr_register_hypercall(H_SCM_HEALTH, h_scm_health); 861 spapr_register_hypercall(H_SCM_FLUSH, h_scm_flush); 862 } 863 864 type_init(spapr_scm_register_types) 865 866 static void spapr_nvdimm_realize(NVDIMMDevice *dimm, Error **errp) 867 { 868 SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(dimm); 869 HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(dimm)->hostmem); 870 bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL); 871 bool pmem_override = object_property_get_bool(OBJECT(dimm), "pmem-override", 872 NULL); 873 if (!is_pmem || pmem_override) { 874 s_nvdimm->hcall_flush_required = true; 875 } 876 877 vmstate_register_any(NULL, &vmstate_spapr_nvdimm_states, dimm); 878 } 879 880 static void spapr_nvdimm_unrealize(NVDIMMDevice *dimm) 881 { 882 vmstate_unregister(NULL, &vmstate_spapr_nvdimm_states, dimm); 883 } 884 885 #ifdef CONFIG_LIBPMEM 886 static const Property spapr_nvdimm_properties[] = { 887 DEFINE_PROP_BOOL("pmem-override", SpaprNVDIMMDevice, pmem_override, false), 888 }; 889 #endif 890 891 static void spapr_nvdimm_class_init(ObjectClass *oc, const void *data) 892 { 893 NVDIMMClass *nvc = NVDIMM_CLASS(oc); 894 895 nvc->realize = spapr_nvdimm_realize; 896 nvc->unrealize = spapr_nvdimm_unrealize; 897 898 #ifdef CONFIG_LIBPMEM 899 device_class_set_props(DEVICE_CLASS(oc), spapr_nvdimm_properties); 900 #endif 901 } 902 903 static void spapr_nvdimm_init(Object *obj) 904 { 905 SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(obj); 906 907 s_nvdimm->hcall_flush_required = false; 908 QLIST_INIT(&s_nvdimm->pending_nvdimm_flush_states); 909 QLIST_INIT(&s_nvdimm->completed_nvdimm_flush_states); 910 } 911 912 static TypeInfo spapr_nvdimm_info = { 913 .name = TYPE_SPAPR_NVDIMM, 914 .parent = TYPE_NVDIMM, 915 .class_init = spapr_nvdimm_class_init, 916 .class_size = sizeof(SPAPRNVDIMMClass), 917 .instance_size = sizeof(SpaprNVDIMMDevice), 918 .instance_init = spapr_nvdimm_init, 919 }; 920 921 static void spapr_nvdimm_register_types(void) 922 { 923 type_register_static(&spapr_nvdimm_info); 924 } 925 926 type_init(spapr_nvdimm_register_types) 927