1 /*
2 * QEMU PAPR Storage Class Memory Interfaces
3 *
4 * Copyright (c) 2019-2020, IBM Corporation.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24 #include "qemu/osdep.h"
25 #include "qemu/cutils.h"
26 #include "qapi/error.h"
27 #include "hw/ppc/spapr_drc.h"
28 #include "hw/ppc/spapr_nvdimm.h"
29 #include "hw/mem/nvdimm.h"
30 #include "qemu/nvdimm-utils.h"
31 #include "hw/ppc/fdt.h"
32 #include "qemu/range.h"
33 #include "hw/ppc/spapr_numa.h"
34 #include "block/thread-pool.h"
35 #include "migration/vmstate.h"
36 #include "qemu/pmem.h"
37 #include "hw/qdev-properties.h"
38
39 /* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */
40 /* SCM device is unable to persist memory contents */
41 #define PAPR_PMEM_UNARMED PPC_BIT(0)
42
43 /*
44 * The nvdimm size should be aligned to SCM block size.
45 * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE
46 * in order to have SCM regions not to overlap with dimm memory regions.
47 * The SCM devices can have variable block sizes. For now, fixing the
48 * block size to the minimum value.
49 */
50 #define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE
51
52 /* Have an explicit check for alignment */
53 QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE);
54
55 #define TYPE_SPAPR_NVDIMM "spapr-nvdimm"
56 OBJECT_DECLARE_TYPE(SpaprNVDIMMDevice, SPAPRNVDIMMClass, SPAPR_NVDIMM)
57
58 struct SPAPRNVDIMMClass {
59 /* private */
60 NVDIMMClass parent_class;
61
62 /* public */
63 void (*realize)(NVDIMMDevice *dimm, Error **errp);
64 void (*unrealize)(NVDIMMDevice *dimm, Error **errp);
65 };
66
spapr_nvdimm_validate(HotplugHandler * hotplug_dev,NVDIMMDevice * nvdimm,uint64_t size,Error ** errp)67 bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
68 uint64_t size, Error **errp)
69 {
70 const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
71 const MachineState *ms = MACHINE(hotplug_dev);
72 PCDIMMDevice *dimm = PC_DIMM(nvdimm);
73 MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem);
74 g_autofree char *uuidstr = NULL;
75 QemuUUID uuid;
76 int ret;
77
78 if (!mc->nvdimm_supported) {
79 error_setg(errp, "NVDIMM hotplug not supported for this machine");
80 return false;
81 }
82
83 if (!ms->nvdimms_state->is_enabled) {
84 error_setg(errp, "nvdimm device found but 'nvdimm=off' was set");
85 return false;
86 }
87
88 if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP,
89 &error_abort) == 0) {
90 error_setg(errp, "PAPR requires NVDIMM devices to have label-size set");
91 return false;
92 }
93
94 if (size % SPAPR_MINIMUM_SCM_BLOCK_SIZE) {
95 error_setg(errp, "PAPR requires NVDIMM memory size (excluding label)"
96 " to be a multiple of %" PRIu64 "MB",
97 SPAPR_MINIMUM_SCM_BLOCK_SIZE / MiB);
98 return false;
99 }
100
101 uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP,
102 &error_abort);
103 ret = qemu_uuid_parse(uuidstr, &uuid);
104 g_assert(!ret);
105
106 if (qemu_uuid_is_null(&uuid)) {
107 error_setg(errp, "NVDIMM device requires the uuid to be set");
108 return false;
109 }
110
111 if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM) &&
112 (memory_region_get_fd(mr) < 0)) {
113 error_setg(errp, "spapr-nvdimm device requires the "
114 "memdev %s to be of memory-backend-file type",
115 object_get_canonical_path_component(OBJECT(dimm->hostmem)));
116 return false;
117 }
118
119 return true;
120 }
121
122
spapr_add_nvdimm(DeviceState * dev,uint64_t slot)123 void spapr_add_nvdimm(DeviceState *dev, uint64_t slot)
124 {
125 SpaprDrc *drc;
126 bool hotplugged = spapr_drc_hotplugged(dev);
127
128 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
129 g_assert(drc);
130
131 /*
132 * pc_dimm_get_free_slot() provided a free slot at pre-plug. The
133 * corresponding DRC is thus assumed to be attachable.
134 */
135 spapr_drc_attach(drc, dev);
136
137 if (hotplugged) {
138 spapr_hotplug_req_add_by_index(drc);
139 }
140 }
141
spapr_dt_nvdimm(SpaprMachineState * spapr,void * fdt,int parent_offset,NVDIMMDevice * nvdimm)142 static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
143 int parent_offset, NVDIMMDevice *nvdimm)
144 {
145 int child_offset;
146 char *buf;
147 SpaprDrc *drc;
148 uint32_t drc_idx;
149 uint32_t node = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_NODE_PROP,
150 &error_abort);
151 uint64_t slot = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_SLOT_PROP,
152 &error_abort);
153 uint64_t lsize = nvdimm->label_size;
154 uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
155 NULL);
156
157 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
158 g_assert(drc);
159
160 drc_idx = spapr_drc_index(drc);
161
162 buf = g_strdup_printf("ibm,pmemory@%x", drc_idx);
163 child_offset = fdt_add_subnode(fdt, parent_offset, buf);
164 g_free(buf);
165
166 _FDT(child_offset);
167
168 _FDT((fdt_setprop_cell(fdt, child_offset, "reg", drc_idx)));
169 _FDT((fdt_setprop_string(fdt, child_offset, "compatible", "ibm,pmemory")));
170 _FDT((fdt_setprop_string(fdt, child_offset, "device_type", "ibm,pmemory")));
171
172 spapr_numa_write_associativity_dt(spapr, fdt, child_offset, node);
173
174 buf = qemu_uuid_unparse_strdup(&nvdimm->uuid);
175 _FDT((fdt_setprop_string(fdt, child_offset, "ibm,unit-guid", buf)));
176 g_free(buf);
177
178 _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,my-drc-index", drc_idx)));
179
180 _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,block-size",
181 SPAPR_MINIMUM_SCM_BLOCK_SIZE)));
182 _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,number-of-blocks",
183 size / SPAPR_MINIMUM_SCM_BLOCK_SIZE)));
184 _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,metadata-size", lsize)));
185
186 _FDT((fdt_setprop_string(fdt, child_offset, "ibm,pmem-application",
187 "operating-system")));
188 _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
189
190 if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) {
191 bool is_pmem = false, pmem_override = false;
192 PCDIMMDevice *dimm = PC_DIMM(nvdimm);
193 HostMemoryBackend *hostmem = dimm->hostmem;
194
195 is_pmem = object_property_get_bool(OBJECT(hostmem), "pmem", NULL);
196 pmem_override = object_property_get_bool(OBJECT(nvdimm),
197 "pmem-override", NULL);
198 if (!is_pmem || pmem_override) {
199 _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required",
200 NULL, 0));
201 }
202 }
203
204 return child_offset;
205 }
206
spapr_pmem_dt_populate(SpaprDrc * drc,SpaprMachineState * spapr,void * fdt,int * fdt_start_offset,Error ** errp)207 int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
208 void *fdt, int *fdt_start_offset, Error **errp)
209 {
210 NVDIMMDevice *nvdimm = NVDIMM(drc->dev);
211
212 *fdt_start_offset = spapr_dt_nvdimm(spapr, fdt, 0, nvdimm);
213
214 return 0;
215 }
216
spapr_dt_persistent_memory(SpaprMachineState * spapr,void * fdt)217 void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt)
218 {
219 int offset = fdt_subnode_offset(fdt, 0, "ibm,persistent-memory");
220 GSList *iter, *nvdimms = nvdimm_get_device_list();
221
222 if (offset < 0) {
223 offset = fdt_add_subnode(fdt, 0, "ibm,persistent-memory");
224 _FDT(offset);
225 _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1)));
226 _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0)));
227 _FDT((fdt_setprop_string(fdt, offset, "device_type",
228 "ibm,persistent-memory")));
229 }
230
231 /* Create DT entries for cold plugged NVDIMM devices */
232 for (iter = nvdimms; iter; iter = iter->next) {
233 NVDIMMDevice *nvdimm = iter->data;
234
235 spapr_dt_nvdimm(spapr, fdt, offset, nvdimm);
236 }
237 g_slist_free(nvdimms);
238 }
239
h_scm_read_metadata(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong opcode,target_ulong * args)240 static target_ulong h_scm_read_metadata(PowerPCCPU *cpu,
241 SpaprMachineState *spapr,
242 target_ulong opcode,
243 target_ulong *args)
244 {
245 uint32_t drc_index = args[0];
246 uint64_t offset = args[1];
247 uint64_t len = args[2];
248 SpaprDrc *drc = spapr_drc_by_index(drc_index);
249 NVDIMMDevice *nvdimm;
250 NVDIMMClass *ddc;
251 uint64_t data = 0;
252 uint8_t buf[8] = { 0 };
253
254 if (!drc || !drc->dev ||
255 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
256 return H_PARAMETER;
257 }
258
259 if (len != 1 && len != 2 &&
260 len != 4 && len != 8) {
261 return H_P3;
262 }
263
264 nvdimm = NVDIMM(drc->dev);
265 if ((offset + len < offset) ||
266 (nvdimm->label_size < len + offset)) {
267 return H_P2;
268 }
269
270 ddc = NVDIMM_GET_CLASS(nvdimm);
271 ddc->read_label_data(nvdimm, buf, len, offset);
272
273 switch (len) {
274 case 1:
275 data = ldub_p(buf);
276 break;
277 case 2:
278 data = lduw_be_p(buf);
279 break;
280 case 4:
281 data = ldl_be_p(buf);
282 break;
283 case 8:
284 data = ldq_be_p(buf);
285 break;
286 default:
287 g_assert_not_reached();
288 }
289
290 args[0] = data;
291
292 return H_SUCCESS;
293 }
294
h_scm_write_metadata(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong opcode,target_ulong * args)295 static target_ulong h_scm_write_metadata(PowerPCCPU *cpu,
296 SpaprMachineState *spapr,
297 target_ulong opcode,
298 target_ulong *args)
299 {
300 uint32_t drc_index = args[0];
301 uint64_t offset = args[1];
302 uint64_t data = args[2];
303 uint64_t len = args[3];
304 SpaprDrc *drc = spapr_drc_by_index(drc_index);
305 NVDIMMDevice *nvdimm;
306 NVDIMMClass *ddc;
307 uint8_t buf[8] = { 0 };
308
309 if (!drc || !drc->dev ||
310 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
311 return H_PARAMETER;
312 }
313
314 if (len != 1 && len != 2 &&
315 len != 4 && len != 8) {
316 return H_P4;
317 }
318
319 nvdimm = NVDIMM(drc->dev);
320 if ((offset + len < offset) ||
321 (nvdimm->label_size < len + offset) ||
322 nvdimm->readonly) {
323 return H_P2;
324 }
325
326 switch (len) {
327 case 1:
328 if (data & 0xffffffffffffff00) {
329 return H_P2;
330 }
331 stb_p(buf, data);
332 break;
333 case 2:
334 if (data & 0xffffffffffff0000) {
335 return H_P2;
336 }
337 stw_be_p(buf, data);
338 break;
339 case 4:
340 if (data & 0xffffffff00000000) {
341 return H_P2;
342 }
343 stl_be_p(buf, data);
344 break;
345 case 8:
346 stq_be_p(buf, data);
347 break;
348 default:
349 g_assert_not_reached();
350 }
351
352 ddc = NVDIMM_GET_CLASS(nvdimm);
353 ddc->write_label_data(nvdimm, buf, len, offset);
354
355 return H_SUCCESS;
356 }
357
h_scm_bind_mem(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong opcode,target_ulong * args)358 static target_ulong h_scm_bind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr,
359 target_ulong opcode, target_ulong *args)
360 {
361 uint32_t drc_index = args[0];
362 uint64_t starting_idx = args[1];
363 uint64_t no_of_scm_blocks_to_bind = args[2];
364 uint64_t target_logical_mem_addr = args[3];
365 uint64_t continue_token = args[4];
366 uint64_t size;
367 uint64_t total_no_of_scm_blocks;
368 SpaprDrc *drc = spapr_drc_by_index(drc_index);
369 hwaddr addr;
370 NVDIMMDevice *nvdimm;
371
372 if (!drc || !drc->dev ||
373 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
374 return H_PARAMETER;
375 }
376
377 /*
378 * Currently continue token should be zero qemu has already bound
379 * everything and this hcall doesn't return H_BUSY.
380 */
381 if (continue_token > 0) {
382 return H_P5;
383 }
384
385 /* Currently qemu assigns the address. */
386 if (target_logical_mem_addr != 0xffffffffffffffff) {
387 return H_OVERLAP;
388 }
389
390 nvdimm = NVDIMM(drc->dev);
391
392 size = object_property_get_uint(OBJECT(nvdimm),
393 PC_DIMM_SIZE_PROP, &error_abort);
394
395 total_no_of_scm_blocks = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
396
397 if (starting_idx > total_no_of_scm_blocks) {
398 return H_P2;
399 }
400
401 if (((starting_idx + no_of_scm_blocks_to_bind) < starting_idx) ||
402 ((starting_idx + no_of_scm_blocks_to_bind) > total_no_of_scm_blocks)) {
403 return H_P3;
404 }
405
406 addr = object_property_get_uint(OBJECT(nvdimm),
407 PC_DIMM_ADDR_PROP, &error_abort);
408
409 addr += starting_idx * SPAPR_MINIMUM_SCM_BLOCK_SIZE;
410
411 /* Already bound, Return target logical address in R5 */
412 args[1] = addr;
413 args[2] = no_of_scm_blocks_to_bind;
414
415 return H_SUCCESS;
416 }
417
418 typedef struct SpaprNVDIMMDeviceFlushState {
419 uint64_t continue_token;
420 int64_t hcall_ret;
421 uint32_t drcidx;
422
423 QLIST_ENTRY(SpaprNVDIMMDeviceFlushState) node;
424 } SpaprNVDIMMDeviceFlushState;
425
426 typedef struct SpaprNVDIMMDevice SpaprNVDIMMDevice;
427 struct SpaprNVDIMMDevice {
428 /* private */
429 NVDIMMDevice parent_obj;
430
431 bool hcall_flush_required;
432 uint64_t nvdimm_flush_token;
433 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState) pending_nvdimm_flush_states;
434 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState) completed_nvdimm_flush_states;
435
436 /* public */
437
438 /*
439 * The 'on' value for this property forced the qemu to enable the hcall
440 * flush for the nvdimm device even if the backend is a pmem
441 */
442 bool pmem_override;
443 };
444
flush_worker_cb(void * opaque)445 static int flush_worker_cb(void *opaque)
446 {
447 SpaprNVDIMMDeviceFlushState *state = opaque;
448 SpaprDrc *drc = spapr_drc_by_index(state->drcidx);
449 PCDIMMDevice *dimm;
450 HostMemoryBackend *backend;
451 int backend_fd;
452
453 g_assert(drc != NULL);
454
455 dimm = PC_DIMM(drc->dev);
456 backend = MEMORY_BACKEND(dimm->hostmem);
457 backend_fd = memory_region_get_fd(&backend->mr);
458
459 if (object_property_get_bool(OBJECT(backend), "pmem", NULL)) {
460 MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem);
461 void *ptr = memory_region_get_ram_ptr(mr);
462 size_t size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP,
463 NULL);
464
465 /* flush pmem backend */
466 pmem_persist(ptr, size);
467 } else {
468 /* flush raw backing image */
469 if (qemu_fdatasync(backend_fd) < 0) {
470 error_report("papr_scm: Could not sync nvdimm to backend file: %s",
471 strerror(errno));
472 return H_HARDWARE;
473 }
474 }
475
476 return H_SUCCESS;
477 }
478
spapr_nvdimm_flush_completion_cb(void * opaque,int hcall_ret)479 static void spapr_nvdimm_flush_completion_cb(void *opaque, int hcall_ret)
480 {
481 SpaprNVDIMMDeviceFlushState *state = opaque;
482 SpaprDrc *drc = spapr_drc_by_index(state->drcidx);
483 SpaprNVDIMMDevice *s_nvdimm;
484
485 g_assert(drc != NULL);
486
487 s_nvdimm = SPAPR_NVDIMM(drc->dev);
488
489 state->hcall_ret = hcall_ret;
490 QLIST_REMOVE(state, node);
491 QLIST_INSERT_HEAD(&s_nvdimm->completed_nvdimm_flush_states, state, node);
492 }
493
spapr_nvdimm_flush_post_load(void * opaque,int version_id)494 static int spapr_nvdimm_flush_post_load(void *opaque, int version_id)
495 {
496 SpaprNVDIMMDevice *s_nvdimm = (SpaprNVDIMMDevice *)opaque;
497 SpaprNVDIMMDeviceFlushState *state;
498 HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(s_nvdimm)->hostmem);
499 bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL);
500 bool pmem_override = object_property_get_bool(OBJECT(s_nvdimm),
501 "pmem-override", NULL);
502 bool dest_hcall_flush_required = pmem_override || !is_pmem;
503
504 if (!s_nvdimm->hcall_flush_required && dest_hcall_flush_required) {
505 error_report("The file backend for the spapr-nvdimm device %s at "
506 "source is a pmem, use pmem=on and pmem-override=off to "
507 "continue.", DEVICE(s_nvdimm)->id);
508 return -EINVAL;
509 }
510 if (s_nvdimm->hcall_flush_required && !dest_hcall_flush_required) {
511 error_report("The guest expects hcall-flush support for the "
512 "spapr-nvdimm device %s, use pmem_override=on to "
513 "continue.", DEVICE(s_nvdimm)->id);
514 return -EINVAL;
515 }
516
517 QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) {
518 thread_pool_submit_aio(flush_worker_cb, state,
519 spapr_nvdimm_flush_completion_cb, state);
520 }
521
522 return 0;
523 }
524
525 static const VMStateDescription vmstate_spapr_nvdimm_flush_state = {
526 .name = "spapr_nvdimm_flush_state",
527 .version_id = 1,
528 .minimum_version_id = 1,
529 .fields = (const VMStateField[]) {
530 VMSTATE_UINT64(continue_token, SpaprNVDIMMDeviceFlushState),
531 VMSTATE_INT64(hcall_ret, SpaprNVDIMMDeviceFlushState),
532 VMSTATE_UINT32(drcidx, SpaprNVDIMMDeviceFlushState),
533 VMSTATE_END_OF_LIST()
534 },
535 };
536
537 const VMStateDescription vmstate_spapr_nvdimm_states = {
538 .name = "spapr_nvdimm_states",
539 .version_id = 1,
540 .minimum_version_id = 1,
541 .post_load = spapr_nvdimm_flush_post_load,
542 .fields = (const VMStateField[]) {
543 VMSTATE_BOOL(hcall_flush_required, SpaprNVDIMMDevice),
544 VMSTATE_UINT64(nvdimm_flush_token, SpaprNVDIMMDevice),
545 VMSTATE_QLIST_V(completed_nvdimm_flush_states, SpaprNVDIMMDevice, 1,
546 vmstate_spapr_nvdimm_flush_state,
547 SpaprNVDIMMDeviceFlushState, node),
548 VMSTATE_QLIST_V(pending_nvdimm_flush_states, SpaprNVDIMMDevice, 1,
549 vmstate_spapr_nvdimm_flush_state,
550 SpaprNVDIMMDeviceFlushState, node),
551 VMSTATE_END_OF_LIST()
552 },
553 };
554
555 /*
556 * Assign a token and reserve it for the new flush state.
557 */
spapr_nvdimm_init_new_flush_state(SpaprNVDIMMDevice * spapr_nvdimm)558 static SpaprNVDIMMDeviceFlushState *spapr_nvdimm_init_new_flush_state(
559 SpaprNVDIMMDevice *spapr_nvdimm)
560 {
561 SpaprNVDIMMDeviceFlushState *state;
562
563 state = g_malloc0(sizeof(*state));
564
565 spapr_nvdimm->nvdimm_flush_token++;
566 /* Token zero is presumed as no job pending. Assert on overflow to zero */
567 g_assert(spapr_nvdimm->nvdimm_flush_token != 0);
568
569 state->continue_token = spapr_nvdimm->nvdimm_flush_token;
570
571 QLIST_INSERT_HEAD(&spapr_nvdimm->pending_nvdimm_flush_states, state, node);
572
573 return state;
574 }
575
576 /*
577 * spapr_nvdimm_finish_flushes
578 * Waits for all pending flush requests to complete
579 * their execution and free the states
580 */
spapr_nvdimm_finish_flushes(void)581 void spapr_nvdimm_finish_flushes(void)
582 {
583 SpaprNVDIMMDeviceFlushState *state, *next;
584 GSList *list, *nvdimms;
585
586 /*
587 * Called on reset path, the main loop thread which calls
588 * the pending BHs has gotten out running in the reset path,
589 * finally reaching here. Other code path being guest
590 * h_client_architecture_support, that's early boot up.
591 */
592 nvdimms = nvdimm_get_device_list();
593 for (list = nvdimms; list; list = list->next) {
594 NVDIMMDevice *nvdimm = list->data;
595 if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) {
596 SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(nvdimm);
597 while (!QLIST_EMPTY(&s_nvdimm->pending_nvdimm_flush_states)) {
598 aio_poll(qemu_get_aio_context(), true);
599 }
600
601 QLIST_FOREACH_SAFE(state, &s_nvdimm->completed_nvdimm_flush_states,
602 node, next) {
603 QLIST_REMOVE(state, node);
604 g_free(state);
605 }
606 }
607 }
608 g_slist_free(nvdimms);
609 }
610
611 /*
612 * spapr_nvdimm_get_flush_status
613 * Fetches the status of the hcall worker and returns
614 * H_LONG_BUSY_ORDER_10_MSEC if the worker is still running.
615 */
spapr_nvdimm_get_flush_status(SpaprNVDIMMDevice * s_nvdimm,uint64_t token)616 static int spapr_nvdimm_get_flush_status(SpaprNVDIMMDevice *s_nvdimm,
617 uint64_t token)
618 {
619 SpaprNVDIMMDeviceFlushState *state, *node;
620
621 QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) {
622 if (state->continue_token == token) {
623 return H_LONG_BUSY_ORDER_10_MSEC;
624 }
625 }
626
627 QLIST_FOREACH_SAFE(state, &s_nvdimm->completed_nvdimm_flush_states,
628 node, node) {
629 if (state->continue_token == token) {
630 int ret = state->hcall_ret;
631 QLIST_REMOVE(state, node);
632 g_free(state);
633 return ret;
634 }
635 }
636
637 /* If not found in complete list too, invalid token */
638 return H_P2;
639 }
640
641 /*
642 * H_SCM_FLUSH
643 * Input: drc_index, continue-token
644 * Out: continue-token
645 * Return Value: H_SUCCESS, H_Parameter, H_P2, H_LONG_BUSY_ORDER_10_MSEC,
646 * H_UNSUPPORTED
647 *
648 * Given a DRC Index Flush the data to backend NVDIMM device. The hcall returns
649 * H_LONG_BUSY_ORDER_10_MSEC when the flush takes longer time and the hcall
650 * needs to be issued multiple times in order to be completely serviced. The
651 * continue-token from the output to be passed in the argument list of
652 * subsequent hcalls until the hcall is completely serviced at which point
653 * H_SUCCESS or other error is returned.
654 */
h_scm_flush(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong opcode,target_ulong * args)655 static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
656 target_ulong opcode, target_ulong *args)
657 {
658 int ret;
659 uint32_t drc_index = args[0];
660 uint64_t continue_token = args[1];
661 SpaprDrc *drc = spapr_drc_by_index(drc_index);
662 PCDIMMDevice *dimm;
663 HostMemoryBackend *backend = NULL;
664 SpaprNVDIMMDeviceFlushState *state;
665 int fd;
666
667 if (!drc || !drc->dev ||
668 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
669 return H_PARAMETER;
670 }
671
672 dimm = PC_DIMM(drc->dev);
673 if (!object_dynamic_cast(OBJECT(dimm), TYPE_SPAPR_NVDIMM)) {
674 return H_PARAMETER;
675 }
676 if (continue_token == 0) {
677 bool is_pmem = false, pmem_override = false;
678 backend = MEMORY_BACKEND(dimm->hostmem);
679 fd = memory_region_get_fd(&backend->mr);
680
681 if (fd < 0) {
682 return H_UNSUPPORTED;
683 }
684
685 is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL);
686 pmem_override = object_property_get_bool(OBJECT(dimm),
687 "pmem-override", NULL);
688 if (is_pmem && !pmem_override) {
689 return H_UNSUPPORTED;
690 }
691
692 state = spapr_nvdimm_init_new_flush_state(SPAPR_NVDIMM(dimm));
693 if (!state) {
694 return H_HARDWARE;
695 }
696
697 state->drcidx = drc_index;
698
699 thread_pool_submit_aio(flush_worker_cb, state,
700 spapr_nvdimm_flush_completion_cb, state);
701
702 continue_token = state->continue_token;
703 }
704
705 ret = spapr_nvdimm_get_flush_status(SPAPR_NVDIMM(dimm), continue_token);
706 if (H_IS_LONG_BUSY(ret)) {
707 args[0] = continue_token;
708 }
709
710 return ret;
711 }
712
h_scm_unbind_mem(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong opcode,target_ulong * args)713 static target_ulong h_scm_unbind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr,
714 target_ulong opcode, target_ulong *args)
715 {
716 uint32_t drc_index = args[0];
717 uint64_t starting_scm_logical_addr = args[1];
718 uint64_t no_of_scm_blocks_to_unbind = args[2];
719 uint64_t continue_token = args[3];
720 uint64_t size_to_unbind;
721 Range blockrange = range_empty;
722 Range nvdimmrange = range_empty;
723 SpaprDrc *drc = spapr_drc_by_index(drc_index);
724 NVDIMMDevice *nvdimm;
725 uint64_t size, addr;
726
727 if (!drc || !drc->dev ||
728 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
729 return H_PARAMETER;
730 }
731
732 /* continue_token should be zero as this hcall doesn't return H_BUSY. */
733 if (continue_token > 0) {
734 return H_P4;
735 }
736
737 /* Check if starting_scm_logical_addr is block aligned */
738 if (!QEMU_IS_ALIGNED(starting_scm_logical_addr,
739 SPAPR_MINIMUM_SCM_BLOCK_SIZE)) {
740 return H_P2;
741 }
742
743 size_to_unbind = no_of_scm_blocks_to_unbind * SPAPR_MINIMUM_SCM_BLOCK_SIZE;
744 if (no_of_scm_blocks_to_unbind == 0 || no_of_scm_blocks_to_unbind !=
745 size_to_unbind / SPAPR_MINIMUM_SCM_BLOCK_SIZE) {
746 return H_P3;
747 }
748
749 nvdimm = NVDIMM(drc->dev);
750 size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
751 &error_abort);
752 addr = object_property_get_int(OBJECT(nvdimm), PC_DIMM_ADDR_PROP,
753 &error_abort);
754
755 range_init_nofail(&nvdimmrange, addr, size);
756 range_init_nofail(&blockrange, starting_scm_logical_addr, size_to_unbind);
757
758 if (!range_contains_range(&nvdimmrange, &blockrange)) {
759 return H_P3;
760 }
761
762 args[1] = no_of_scm_blocks_to_unbind;
763
764 /* let unplug take care of actual unbind */
765 return H_SUCCESS;
766 }
767
768 #define H_UNBIND_SCOPE_ALL 0x1
769 #define H_UNBIND_SCOPE_DRC 0x2
770
h_scm_unbind_all(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong opcode,target_ulong * args)771 static target_ulong h_scm_unbind_all(PowerPCCPU *cpu, SpaprMachineState *spapr,
772 target_ulong opcode, target_ulong *args)
773 {
774 uint64_t target_scope = args[0];
775 uint32_t drc_index = args[1];
776 uint64_t continue_token = args[2];
777 NVDIMMDevice *nvdimm;
778 uint64_t size;
779 uint64_t no_of_scm_blocks_unbound = 0;
780
781 /* continue_token should be zero as this hcall doesn't return H_BUSY. */
782 if (continue_token > 0) {
783 return H_P4;
784 }
785
786 if (target_scope == H_UNBIND_SCOPE_DRC) {
787 SpaprDrc *drc = spapr_drc_by_index(drc_index);
788
789 if (!drc || !drc->dev ||
790 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
791 return H_P2;
792 }
793
794 nvdimm = NVDIMM(drc->dev);
795 size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
796 &error_abort);
797
798 no_of_scm_blocks_unbound = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
799 } else if (target_scope == H_UNBIND_SCOPE_ALL) {
800 GSList *list, *nvdimms;
801
802 nvdimms = nvdimm_get_device_list();
803 for (list = nvdimms; list; list = list->next) {
804 nvdimm = list->data;
805 size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
806 &error_abort);
807
808 no_of_scm_blocks_unbound += size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
809 }
810 g_slist_free(nvdimms);
811 } else {
812 return H_PARAMETER;
813 }
814
815 args[1] = no_of_scm_blocks_unbound;
816
817 /* let unplug take care of actual unbind */
818 return H_SUCCESS;
819 }
820
h_scm_health(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong opcode,target_ulong * args)821 static target_ulong h_scm_health(PowerPCCPU *cpu, SpaprMachineState *spapr,
822 target_ulong opcode, target_ulong *args)
823 {
824
825 NVDIMMDevice *nvdimm;
826 uint64_t hbitmap = 0;
827 uint32_t drc_index = args[0];
828 SpaprDrc *drc = spapr_drc_by_index(drc_index);
829 const uint64_t hbitmap_mask = PAPR_PMEM_UNARMED;
830
831
832 /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */
833 if (!drc || !drc->dev ||
834 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
835 return H_PARAMETER;
836 }
837
838 nvdimm = NVDIMM(drc->dev);
839
840 /* Update if the nvdimm is unarmed and send its status via health bitmaps */
841 if (object_property_get_bool(OBJECT(nvdimm), NVDIMM_UNARMED_PROP, NULL)) {
842 hbitmap |= PAPR_PMEM_UNARMED;
843 }
844
845 /* Update the out args with health bitmap/mask */
846 args[0] = hbitmap;
847 args[1] = hbitmap_mask;
848
849 return H_SUCCESS;
850 }
851
spapr_scm_register_types(void)852 static void spapr_scm_register_types(void)
853 {
854 /* qemu/scm specific hcalls */
855 spapr_register_hypercall(H_SCM_READ_METADATA, h_scm_read_metadata);
856 spapr_register_hypercall(H_SCM_WRITE_METADATA, h_scm_write_metadata);
857 spapr_register_hypercall(H_SCM_BIND_MEM, h_scm_bind_mem);
858 spapr_register_hypercall(H_SCM_UNBIND_MEM, h_scm_unbind_mem);
859 spapr_register_hypercall(H_SCM_UNBIND_ALL, h_scm_unbind_all);
860 spapr_register_hypercall(H_SCM_HEALTH, h_scm_health);
861 spapr_register_hypercall(H_SCM_FLUSH, h_scm_flush);
862 }
863
type_init(spapr_scm_register_types)864 type_init(spapr_scm_register_types)
865
866 static void spapr_nvdimm_realize(NVDIMMDevice *dimm, Error **errp)
867 {
868 SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(dimm);
869 HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(dimm)->hostmem);
870 bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL);
871 bool pmem_override = object_property_get_bool(OBJECT(dimm), "pmem-override",
872 NULL);
873 if (!is_pmem || pmem_override) {
874 s_nvdimm->hcall_flush_required = true;
875 }
876
877 vmstate_register_any(NULL, &vmstate_spapr_nvdimm_states, dimm);
878 }
879
spapr_nvdimm_unrealize(NVDIMMDevice * dimm)880 static void spapr_nvdimm_unrealize(NVDIMMDevice *dimm)
881 {
882 vmstate_unregister(NULL, &vmstate_spapr_nvdimm_states, dimm);
883 }
884
885 #ifdef CONFIG_LIBPMEM
886 static const Property spapr_nvdimm_properties[] = {
887 DEFINE_PROP_BOOL("pmem-override", SpaprNVDIMMDevice, pmem_override, false),
888 };
889 #endif
890
spapr_nvdimm_class_init(ObjectClass * oc,const void * data)891 static void spapr_nvdimm_class_init(ObjectClass *oc, const void *data)
892 {
893 NVDIMMClass *nvc = NVDIMM_CLASS(oc);
894
895 nvc->realize = spapr_nvdimm_realize;
896 nvc->unrealize = spapr_nvdimm_unrealize;
897
898 #ifdef CONFIG_LIBPMEM
899 device_class_set_props(DEVICE_CLASS(oc), spapr_nvdimm_properties);
900 #endif
901 }
902
spapr_nvdimm_init(Object * obj)903 static void spapr_nvdimm_init(Object *obj)
904 {
905 SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(obj);
906
907 s_nvdimm->hcall_flush_required = false;
908 QLIST_INIT(&s_nvdimm->pending_nvdimm_flush_states);
909 QLIST_INIT(&s_nvdimm->completed_nvdimm_flush_states);
910 }
911
912 static TypeInfo spapr_nvdimm_info = {
913 .name = TYPE_SPAPR_NVDIMM,
914 .parent = TYPE_NVDIMM,
915 .class_init = spapr_nvdimm_class_init,
916 .class_size = sizeof(SPAPRNVDIMMClass),
917 .instance_size = sizeof(SpaprNVDIMMDevice),
918 .instance_init = spapr_nvdimm_init,
919 };
920
spapr_nvdimm_register_types(void)921 static void spapr_nvdimm_register_types(void)
922 {
923 type_register_static(&spapr_nvdimm_info);
924 }
925
926 type_init(spapr_nvdimm_register_types)
927