xref: /qemu/hw/mem/cxl_type3.c (revision 7e3798eddde9bcbf3e83ddcd236eeca4c413f34f)
1 /*
2  * CXL Type 3 (memory expander) device
3  *
4  * Copyright(C) 2020 Intel Corporation.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2. See the
7  * COPYING file in the top-level directory.
8  *
9  * SPDX-License-Identifier: GPL-v2-only
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/units.h"
14 #include "qemu/error-report.h"
15 #include "qapi/qapi-commands-cxl.h"
16 #include "hw/mem/memory-device.h"
17 #include "hw/mem/pc-dimm.h"
18 #include "hw/pci/pci.h"
19 #include "hw/qdev-properties.h"
20 #include "qapi/error.h"
21 #include "qemu/log.h"
22 #include "qemu/module.h"
23 #include "qemu/pmem.h"
24 #include "qemu/range.h"
25 #include "qemu/rcu.h"
26 #include "qemu/guest-random.h"
27 #include "sysemu/hostmem.h"
28 #include "sysemu/numa.h"
29 #include "hw/cxl/cxl.h"
30 #include "hw/pci/msix.h"
31 
32 #define DWORD_BYTE 4
33 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
34 
35 /* Default CDAT entries for a memory region */
36 enum {
37     CT3_CDAT_DSMAS,
38     CT3_CDAT_DSLBIS0,
39     CT3_CDAT_DSLBIS1,
40     CT3_CDAT_DSLBIS2,
41     CT3_CDAT_DSLBIS3,
42     CT3_CDAT_DSEMTS,
43     CT3_CDAT_NUM_ENTRIES
44 };
45 
46 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
47                                           int dsmad_handle, uint64_t size,
48                                           bool is_pmem, bool is_dynamic,
49                                           uint64_t dpa_base)
50 {
51     CDATDsmas *dsmas;
52     CDATDslbis *dslbis0;
53     CDATDslbis *dslbis1;
54     CDATDslbis *dslbis2;
55     CDATDslbis *dslbis3;
56     CDATDsemts *dsemts;
57 
58     dsmas = g_malloc(sizeof(*dsmas));
59     *dsmas = (CDATDsmas) {
60         .header = {
61             .type = CDAT_TYPE_DSMAS,
62             .length = sizeof(*dsmas),
63         },
64         .DSMADhandle = dsmad_handle,
65         .flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) |
66                  (is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0),
67         .DPA_base = dpa_base,
68         .DPA_length = size,
69     };
70 
71     /* For now, no memory side cache, plausiblish numbers */
72     dslbis0 = g_malloc(sizeof(*dslbis0));
73     *dslbis0 = (CDATDslbis) {
74         .header = {
75             .type = CDAT_TYPE_DSLBIS,
76             .length = sizeof(*dslbis0),
77         },
78         .handle = dsmad_handle,
79         .flags = HMAT_LB_MEM_MEMORY,
80         .data_type = HMAT_LB_DATA_READ_LATENCY,
81         .entry_base_unit = 10000, /* 10ns base */
82         .entry[0] = 15, /* 150ns */
83     };
84 
85     dslbis1 = g_malloc(sizeof(*dslbis1));
86     *dslbis1 = (CDATDslbis) {
87         .header = {
88             .type = CDAT_TYPE_DSLBIS,
89             .length = sizeof(*dslbis1),
90         },
91         .handle = dsmad_handle,
92         .flags = HMAT_LB_MEM_MEMORY,
93         .data_type = HMAT_LB_DATA_WRITE_LATENCY,
94         .entry_base_unit = 10000,
95         .entry[0] = 25, /* 250ns */
96     };
97 
98     dslbis2 = g_malloc(sizeof(*dslbis2));
99     *dslbis2 = (CDATDslbis) {
100         .header = {
101             .type = CDAT_TYPE_DSLBIS,
102             .length = sizeof(*dslbis2),
103         },
104         .handle = dsmad_handle,
105         .flags = HMAT_LB_MEM_MEMORY,
106         .data_type = HMAT_LB_DATA_READ_BANDWIDTH,
107         .entry_base_unit = 1000, /* GB/s */
108         .entry[0] = 16,
109     };
110 
111     dslbis3 = g_malloc(sizeof(*dslbis3));
112     *dslbis3 = (CDATDslbis) {
113         .header = {
114             .type = CDAT_TYPE_DSLBIS,
115             .length = sizeof(*dslbis3),
116         },
117         .handle = dsmad_handle,
118         .flags = HMAT_LB_MEM_MEMORY,
119         .data_type = HMAT_LB_DATA_WRITE_BANDWIDTH,
120         .entry_base_unit = 1000, /* GB/s */
121         .entry[0] = 16,
122     };
123 
124     dsemts = g_malloc(sizeof(*dsemts));
125     *dsemts = (CDATDsemts) {
126         .header = {
127             .type = CDAT_TYPE_DSEMTS,
128             .length = sizeof(*dsemts),
129         },
130         .DSMAS_handle = dsmad_handle,
131         /*
132          * NV: Reserved - the non volatile from DSMAS matters
133          * V: EFI_MEMORY_SP
134          */
135         .EFI_memory_type_attr = is_pmem ? 2 : 1,
136         .DPA_offset = 0,
137         .DPA_length = size,
138     };
139 
140     /* Header always at start of structure */
141     cdat_table[CT3_CDAT_DSMAS] = (CDATSubHeader *)dsmas;
142     cdat_table[CT3_CDAT_DSLBIS0] = (CDATSubHeader *)dslbis0;
143     cdat_table[CT3_CDAT_DSLBIS1] = (CDATSubHeader *)dslbis1;
144     cdat_table[CT3_CDAT_DSLBIS2] = (CDATSubHeader *)dslbis2;
145     cdat_table[CT3_CDAT_DSLBIS3] = (CDATSubHeader *)dslbis3;
146     cdat_table[CT3_CDAT_DSEMTS] = (CDATSubHeader *)dsemts;
147 }
148 
149 static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
150 {
151     g_autofree CDATSubHeader **table = NULL;
152     CXLType3Dev *ct3d = priv;
153     MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
154     MemoryRegion *dc_mr = NULL;
155     uint64_t vmr_size = 0, pmr_size = 0;
156     int dsmad_handle = 0;
157     int cur_ent = 0;
158     int len = 0;
159 
160     if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
161         return 0;
162     }
163 
164     if (ct3d->hostvmem) {
165         volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
166         if (!volatile_mr) {
167             return -EINVAL;
168         }
169         len += CT3_CDAT_NUM_ENTRIES;
170         vmr_size = memory_region_size(volatile_mr);
171     }
172 
173     if (ct3d->hostpmem) {
174         nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostpmem);
175         if (!nonvolatile_mr) {
176             return -EINVAL;
177         }
178         len += CT3_CDAT_NUM_ENTRIES;
179         pmr_size = memory_region_size(nonvolatile_mr);
180     }
181 
182     if (ct3d->dc.num_regions) {
183         if (!ct3d->dc.host_dc) {
184             return -EINVAL;
185         }
186         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
187         if (!dc_mr) {
188             return -EINVAL;
189         }
190         len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
191     }
192 
193     table = g_malloc0(len * sizeof(*table));
194 
195     /* Now fill them in */
196     if (volatile_mr) {
197         ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size,
198                                       false, false, 0);
199         cur_ent = CT3_CDAT_NUM_ENTRIES;
200     }
201 
202     if (nonvolatile_mr) {
203         uint64_t base = vmr_size;
204         ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
205                                       pmr_size, true, false, base);
206         cur_ent += CT3_CDAT_NUM_ENTRIES;
207     }
208 
209     if (dc_mr) {
210         int i;
211         uint64_t region_base = vmr_size + pmr_size;
212 
213         /*
214          * We assume the dynamic capacity to be volatile for now.
215          * Non-volatile dynamic capacity will be added if needed in the
216          * future.
217          */
218         for (i = 0; i < ct3d->dc.num_regions; i++) {
219             ct3_build_cdat_entries_for_mr(&(table[cur_ent]),
220                                           dsmad_handle++,
221                                           ct3d->dc.regions[i].len,
222                                           false, true, region_base);
223             ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1;
224 
225             cur_ent += CT3_CDAT_NUM_ENTRIES;
226             region_base += ct3d->dc.regions[i].len;
227         }
228     }
229 
230     assert(len == cur_ent);
231 
232     *cdat_table = g_steal_pointer(&table);
233 
234     return len;
235 }
236 
237 static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv)
238 {
239     int i;
240 
241     for (i = 0; i < num; i++) {
242         g_free(cdat_table[i]);
243     }
244     g_free(cdat_table);
245 }
246 
247 static bool cxl_doe_cdat_rsp(DOECap *doe_cap)
248 {
249     CDATObject *cdat = &CXL_TYPE3(doe_cap->pdev)->cxl_cstate.cdat;
250     uint16_t ent;
251     void *base;
252     uint32_t len;
253     CDATReq *req = pcie_doe_get_write_mbox_ptr(doe_cap);
254     CDATRsp rsp;
255 
256     assert(cdat->entry_len);
257 
258     /* Discard if request length mismatched */
259     if (pcie_doe_get_obj_len(req) <
260         DIV_ROUND_UP(sizeof(CDATReq), DWORD_BYTE)) {
261         return false;
262     }
263 
264     ent = req->entry_handle;
265     base = cdat->entry[ent].base;
266     len = cdat->entry[ent].length;
267 
268     rsp = (CDATRsp) {
269         .header = {
270             .vendor_id = CXL_VENDOR_ID,
271             .data_obj_type = CXL_DOE_TABLE_ACCESS,
272             .reserved = 0x0,
273             .length = DIV_ROUND_UP((sizeof(rsp) + len), DWORD_BYTE),
274         },
275         .rsp_code = CXL_DOE_TAB_RSP,
276         .table_type = CXL_DOE_TAB_TYPE_CDAT,
277         .entry_handle = (ent < cdat->entry_len - 1) ?
278                         ent + 1 : CXL_DOE_TAB_ENT_MAX,
279     };
280 
281     memcpy(doe_cap->read_mbox, &rsp, sizeof(rsp));
282     memcpy(doe_cap->read_mbox + DIV_ROUND_UP(sizeof(rsp), DWORD_BYTE),
283            base, len);
284 
285     doe_cap->read_mbox_len += rsp.header.length;
286 
287     return true;
288 }
289 
290 static uint32_t ct3d_config_read(PCIDevice *pci_dev, uint32_t addr, int size)
291 {
292     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
293     uint32_t val;
294 
295     if (pcie_doe_read_config(&ct3d->doe_cdat, addr, size, &val)) {
296         return val;
297     }
298 
299     return pci_default_read_config(pci_dev, addr, size);
300 }
301 
302 static void ct3d_config_write(PCIDevice *pci_dev, uint32_t addr, uint32_t val,
303                               int size)
304 {
305     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
306 
307     pcie_doe_write_config(&ct3d->doe_cdat, addr, val, size);
308     pci_default_write_config(pci_dev, addr, val, size);
309     pcie_aer_write_config(pci_dev, addr, val, size);
310 }
311 
312 /*
313  * Null value of all Fs suggested by IEEE RA guidelines for use of
314  * EU, OUI and CID
315  */
316 #define UI64_NULL ~(0ULL)
317 
318 static void build_dvsecs(CXLType3Dev *ct3d)
319 {
320     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
321     uint8_t *dvsec;
322     uint32_t range1_size_hi, range1_size_lo,
323              range1_base_hi = 0, range1_base_lo = 0,
324              range2_size_hi = 0, range2_size_lo = 0,
325              range2_base_hi = 0, range2_base_lo = 0;
326 
327     /*
328      * Volatile memory is mapped as (0x0)
329      * Persistent memory is mapped at (volatile->size)
330      */
331     if (ct3d->hostvmem) {
332         range1_size_hi = ct3d->hostvmem->size >> 32;
333         range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
334                          (ct3d->hostvmem->size & 0xF0000000);
335         if (ct3d->hostpmem) {
336             range2_size_hi = ct3d->hostpmem->size >> 32;
337             range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
338                              (ct3d->hostpmem->size & 0xF0000000);
339         }
340     } else if (ct3d->hostpmem) {
341         range1_size_hi = ct3d->hostpmem->size >> 32;
342         range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
343                          (ct3d->hostpmem->size & 0xF0000000);
344     } else {
345         /*
346          * For DCD with no static memory, set memory active, memory class bits.
347          * No range is set.
348          */
349         range1_size_hi = 0;
350         range1_size_lo = (2 << 5) | (2 << 2) | 0x3;
351     }
352 
353     dvsec = (uint8_t *)&(CXLDVSECDevice){
354         .cap = 0x1e,
355         .ctrl = 0x2,
356         .status2 = 0x2,
357         .range1_size_hi = range1_size_hi,
358         .range1_size_lo = range1_size_lo,
359         .range1_base_hi = range1_base_hi,
360         .range1_base_lo = range1_base_lo,
361         .range2_size_hi = range2_size_hi,
362         .range2_size_lo = range2_size_lo,
363         .range2_base_hi = range2_base_hi,
364         .range2_base_lo = range2_base_lo,
365     };
366     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
367                                PCIE_CXL_DEVICE_DVSEC_LENGTH,
368                                PCIE_CXL_DEVICE_DVSEC,
369                                PCIE_CXL31_DEVICE_DVSEC_REVID, dvsec);
370 
371     dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
372         .rsvd         = 0,
373         .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
374         .reg0_base_hi = 0,
375         .reg1_base_lo = RBI_CXL_DEVICE_REG | CXL_DEVICE_REG_BAR_IDX,
376         .reg1_base_hi = 0,
377     };
378     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
379                                REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
380                                REG_LOC_DVSEC_REVID, dvsec);
381     dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){
382         .phase2_duration = 0x603, /* 3 seconds */
383         .phase2_power = 0x33, /* 0x33 miliwatts */
384     };
385     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
386                                GPF_DEVICE_DVSEC_LENGTH, GPF_DEVICE_DVSEC,
387                                GPF_DEVICE_DVSEC_REVID, dvsec);
388 
389     dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){
390         .cap                     = 0x26, /* 68B, IO, Mem, non-MLD */
391         .ctrl                    = 0x02, /* IO always enabled */
392         .status                  = 0x26, /* same as capabilities */
393         .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */
394     };
395     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
396                                PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH,
397                                PCIE_FLEXBUS_PORT_DVSEC,
398                                PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, dvsec);
399 }
400 
401 static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
402 {
403     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
404     ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
405     uint32_t *cache_mem = cregs->cache_mem_registers;
406     uint32_t ctrl;
407 
408     ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc);
409     /* TODO: Sanity checks that the decoder is possible */
410     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0);
411     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
412 
413     stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl);
414 }
415 
416 static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int which)
417 {
418     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
419     ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
420     uint32_t *cache_mem = cregs->cache_mem_registers;
421     uint32_t ctrl;
422 
423     ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc);
424 
425     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0);
426     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 0);
427 
428     stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl);
429 }
430 
431 static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)
432 {
433     switch (qmp_err) {
434     case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY:
435         return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY;
436     case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY:
437         return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY;
438     case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY:
439         return CXL_RAS_UNC_ERR_CACHE_BE_PARITY;
440     case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC:
441         return CXL_RAS_UNC_ERR_CACHE_DATA_ECC;
442     case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY:
443         return CXL_RAS_UNC_ERR_MEM_DATA_PARITY;
444     case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY:
445         return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY;
446     case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY:
447         return CXL_RAS_UNC_ERR_MEM_BE_PARITY;
448     case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC:
449         return CXL_RAS_UNC_ERR_MEM_DATA_ECC;
450     case CXL_UNCOR_ERROR_TYPE_REINIT_THRESHOLD:
451         return CXL_RAS_UNC_ERR_REINIT_THRESHOLD;
452     case CXL_UNCOR_ERROR_TYPE_RSVD_ENCODING:
453         return CXL_RAS_UNC_ERR_RSVD_ENCODING;
454     case CXL_UNCOR_ERROR_TYPE_POISON_RECEIVED:
455         return CXL_RAS_UNC_ERR_POISON_RECEIVED;
456     case CXL_UNCOR_ERROR_TYPE_RECEIVER_OVERFLOW:
457         return CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW;
458     case CXL_UNCOR_ERROR_TYPE_INTERNAL:
459         return CXL_RAS_UNC_ERR_INTERNAL;
460     case CXL_UNCOR_ERROR_TYPE_CXL_IDE_TX:
461         return CXL_RAS_UNC_ERR_CXL_IDE_TX;
462     case CXL_UNCOR_ERROR_TYPE_CXL_IDE_RX:
463         return CXL_RAS_UNC_ERR_CXL_IDE_RX;
464     default:
465         return -EINVAL;
466     }
467 }
468 
469 static int ct3d_qmp_cor_err_to_cxl(CxlCorErrorType qmp_err)
470 {
471     switch (qmp_err) {
472     case CXL_COR_ERROR_TYPE_CACHE_DATA_ECC:
473         return CXL_RAS_COR_ERR_CACHE_DATA_ECC;
474     case CXL_COR_ERROR_TYPE_MEM_DATA_ECC:
475         return CXL_RAS_COR_ERR_MEM_DATA_ECC;
476     case CXL_COR_ERROR_TYPE_CRC_THRESHOLD:
477         return CXL_RAS_COR_ERR_CRC_THRESHOLD;
478     case CXL_COR_ERROR_TYPE_RETRY_THRESHOLD:
479         return CXL_RAS_COR_ERR_RETRY_THRESHOLD;
480     case CXL_COR_ERROR_TYPE_CACHE_POISON_RECEIVED:
481         return CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED;
482     case CXL_COR_ERROR_TYPE_MEM_POISON_RECEIVED:
483         return CXL_RAS_COR_ERR_MEM_POISON_RECEIVED;
484     case CXL_COR_ERROR_TYPE_PHYSICAL:
485         return CXL_RAS_COR_ERR_PHYSICAL;
486     default:
487         return -EINVAL;
488     }
489 }
490 
491 static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
492                            unsigned size)
493 {
494     CXLComponentState *cxl_cstate = opaque;
495     ComponentRegisters *cregs = &cxl_cstate->crb;
496     CXLType3Dev *ct3d = container_of(cxl_cstate, CXLType3Dev, cxl_cstate);
497     uint32_t *cache_mem = cregs->cache_mem_registers;
498     bool should_commit = false;
499     bool should_uncommit = false;
500     int which_hdm = -1;
501 
502     assert(size == 4);
503     g_assert(offset < CXL2_COMPONENT_CM_REGION_SIZE);
504 
505     switch (offset) {
506     case A_CXL_HDM_DECODER0_CTRL:
507         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
508         should_uncommit = !should_commit;
509         which_hdm = 0;
510         break;
511     case A_CXL_HDM_DECODER1_CTRL:
512         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
513         should_uncommit = !should_commit;
514         which_hdm = 1;
515         break;
516     case A_CXL_HDM_DECODER2_CTRL:
517         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
518         should_uncommit = !should_commit;
519         which_hdm = 2;
520         break;
521     case A_CXL_HDM_DECODER3_CTRL:
522         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
523         should_uncommit = !should_commit;
524         which_hdm = 3;
525         break;
526     case A_CXL_RAS_UNC_ERR_STATUS:
527     {
528         uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
529         uint32_t fe = FIELD_EX32(capctrl, CXL_RAS_ERR_CAP_CTRL,
530                                  FIRST_ERROR_POINTER);
531         CXLError *cxl_err;
532         uint32_t unc_err;
533 
534         /*
535          * If single bit written that corresponds to the first error
536          * pointer being cleared, update the status and header log.
537          */
538         if (!QTAILQ_EMPTY(&ct3d->error_list)) {
539             if ((1 << fe) ^ value) {
540                 CXLError *cxl_next;
541                 /*
542                  * Software is using wrong flow for multiple header recording
543                  * Following behavior in PCIe r6.0 and assuming multiple
544                  * header support. Implementation defined choice to clear all
545                  * matching records if more than one bit set - which corresponds
546                  * closest to behavior of hardware not capable of multiple
547                  * header recording.
548                  */
549                 QTAILQ_FOREACH_SAFE(cxl_err, &ct3d->error_list, node,
550                                     cxl_next) {
551                     if ((1 << cxl_err->type) & value) {
552                         QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
553                         g_free(cxl_err);
554                     }
555                 }
556             } else {
557                 /* Done with previous FE, so drop from list */
558                 cxl_err = QTAILQ_FIRST(&ct3d->error_list);
559                 QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
560                 g_free(cxl_err);
561             }
562 
563             /*
564              * If there is another FE, then put that in place and update
565              * the header log
566              */
567             if (!QTAILQ_EMPTY(&ct3d->error_list)) {
568                 uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
569                 int i;
570 
571                 cxl_err = QTAILQ_FIRST(&ct3d->error_list);
572                 for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
573                     stl_le_p(header_log + i, cxl_err->header[i]);
574                 }
575                 capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
576                                      FIRST_ERROR_POINTER, cxl_err->type);
577             } else {
578                 /*
579                  * If no more errors, then follow recommendation of PCI spec
580                  * r6.0 6.2.4.2 to set the first error pointer to a status
581                  * bit that will never be used.
582                  */
583                 capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
584                                      FIRST_ERROR_POINTER,
585                                      CXL_RAS_UNC_ERR_CXL_UNUSED);
586             }
587             stl_le_p((uint8_t *)cache_mem + A_CXL_RAS_ERR_CAP_CTRL, capctrl);
588         }
589         unc_err = 0;
590         QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
591             unc_err |= 1 << cxl_err->type;
592         }
593         stl_le_p((uint8_t *)cache_mem + offset, unc_err);
594 
595         return;
596     }
597     case A_CXL_RAS_COR_ERR_STATUS:
598     {
599         uint32_t rw1c = value;
600         uint32_t temp = ldl_le_p((uint8_t *)cache_mem + offset);
601         temp &= ~rw1c;
602         stl_le_p((uint8_t *)cache_mem + offset, temp);
603         return;
604     }
605     default:
606         break;
607     }
608 
609     stl_le_p((uint8_t *)cache_mem + offset, value);
610     if (should_commit) {
611         hdm_decoder_commit(ct3d, which_hdm);
612     } else if (should_uncommit) {
613         hdm_decoder_uncommit(ct3d, which_hdm);
614     }
615 }
616 
617 /*
618  * TODO: dc region configuration will be updated once host backend and address
619  * space support is added for DCD.
620  */
621 static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
622 {
623     int i;
624     uint64_t region_base = 0;
625     uint64_t region_len;
626     uint64_t decode_len;
627     uint64_t blk_size = 2 * MiB;
628     CXLDCRegion *region;
629     MemoryRegion *mr;
630     uint64_t dc_size;
631 
632     mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
633     dc_size = memory_region_size(mr);
634     region_len = DIV_ROUND_UP(dc_size, ct3d->dc.num_regions);
635 
636     if (dc_size % (ct3d->dc.num_regions * CXL_CAPACITY_MULTIPLIER) != 0) {
637         error_setg(errp,
638                    "backend size is not multiple of region len: 0x%" PRIx64,
639                    region_len);
640         return false;
641     }
642     if (region_len % CXL_CAPACITY_MULTIPLIER != 0) {
643         error_setg(errp, "DC region size is unaligned to 0x%" PRIx64,
644                    CXL_CAPACITY_MULTIPLIER);
645         return false;
646     }
647     decode_len = region_len;
648 
649     if (ct3d->hostvmem) {
650         mr = host_memory_backend_get_memory(ct3d->hostvmem);
651         region_base += memory_region_size(mr);
652     }
653     if (ct3d->hostpmem) {
654         mr = host_memory_backend_get_memory(ct3d->hostpmem);
655         region_base += memory_region_size(mr);
656     }
657     if (region_base % CXL_CAPACITY_MULTIPLIER != 0) {
658         error_setg(errp, "DC region base not aligned to 0x%" PRIx64,
659                    CXL_CAPACITY_MULTIPLIER);
660         return false;
661     }
662 
663     for (i = 0, region = &ct3d->dc.regions[0];
664          i < ct3d->dc.num_regions;
665          i++, region++, region_base += region_len) {
666         *region = (CXLDCRegion) {
667             .base = region_base,
668             .decode_len = decode_len,
669             .len = region_len,
670             .block_size = blk_size,
671             /* dsmad_handle set when creating CDAT table entries */
672             .flags = 0,
673         };
674         ct3d->dc.total_capacity += region->len;
675         region->blk_bitmap = bitmap_new(region->len / region->block_size);
676     }
677     QTAILQ_INIT(&ct3d->dc.extents);
678     QTAILQ_INIT(&ct3d->dc.extents_pending);
679 
680     return true;
681 }
682 
683 static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
684 {
685     CXLDCExtent *ent, *ent_next;
686     CXLDCExtentGroup *group, *group_next;
687     int i;
688     CXLDCRegion *region;
689 
690     QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) {
691         cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
692     }
693 
694     QTAILQ_FOREACH_SAFE(group, &ct3d->dc.extents_pending, node, group_next) {
695         QTAILQ_REMOVE(&ct3d->dc.extents_pending, group, node);
696         QTAILQ_FOREACH_SAFE(ent, &group->list, node, ent_next) {
697             cxl_remove_extent_from_extent_list(&group->list, ent);
698         }
699         g_free(group);
700     }
701 
702     for (i = 0; i < ct3d->dc.num_regions; i++) {
703         region = &ct3d->dc.regions[i];
704         g_free(region->blk_bitmap);
705     }
706 }
707 
708 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
709 {
710     DeviceState *ds = DEVICE(ct3d);
711 
712     if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
713         && !ct3d->dc.num_regions) {
714         error_setg(errp, "at least one memdev property must be set");
715         return false;
716     } else if (ct3d->hostmem && ct3d->hostpmem) {
717         error_setg(errp, "[memdev] cannot be used with new "
718                          "[persistent-memdev] property");
719         return false;
720     } else if (ct3d->hostmem) {
721         /* Use of hostmem property implies pmem */
722         ct3d->hostpmem = ct3d->hostmem;
723         ct3d->hostmem = NULL;
724     }
725 
726     if (ct3d->hostpmem && !ct3d->lsa) {
727         error_setg(errp, "lsa property must be set for persistent devices");
728         return false;
729     }
730 
731     if (ct3d->hostvmem) {
732         MemoryRegion *vmr;
733         char *v_name;
734 
735         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
736         if (!vmr) {
737             error_setg(errp, "volatile memdev must have backing device");
738             return false;
739         }
740         memory_region_set_nonvolatile(vmr, false);
741         memory_region_set_enabled(vmr, true);
742         host_memory_backend_set_mapped(ct3d->hostvmem, true);
743         if (ds->id) {
744             v_name = g_strdup_printf("cxl-type3-dpa-vmem-space:%s", ds->id);
745         } else {
746             v_name = g_strdup("cxl-type3-dpa-vmem-space");
747         }
748         address_space_init(&ct3d->hostvmem_as, vmr, v_name);
749         ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
750         ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
751         g_free(v_name);
752     }
753 
754     if (ct3d->hostpmem) {
755         MemoryRegion *pmr;
756         char *p_name;
757 
758         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
759         if (!pmr) {
760             error_setg(errp, "persistent memdev must have backing device");
761             return false;
762         }
763         memory_region_set_nonvolatile(pmr, true);
764         memory_region_set_enabled(pmr, true);
765         host_memory_backend_set_mapped(ct3d->hostpmem, true);
766         if (ds->id) {
767             p_name = g_strdup_printf("cxl-type3-dpa-pmem-space:%s", ds->id);
768         } else {
769             p_name = g_strdup("cxl-type3-dpa-pmem-space");
770         }
771         address_space_init(&ct3d->hostpmem_as, pmr, p_name);
772         ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
773         ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
774         g_free(p_name);
775     }
776 
777     ct3d->dc.total_capacity = 0;
778     if (ct3d->dc.num_regions > 0) {
779         MemoryRegion *dc_mr;
780         char *dc_name;
781 
782         if (!ct3d->dc.host_dc) {
783             error_setg(errp, "dynamic capacity must have a backing device");
784             return false;
785         }
786 
787         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
788         if (!dc_mr) {
789             error_setg(errp, "dynamic capacity must have a backing device");
790             return false;
791         }
792 
793         /*
794          * Set DC regions as volatile for now, non-volatile support can
795          * be added in the future if needed.
796          */
797         memory_region_set_nonvolatile(dc_mr, false);
798         memory_region_set_enabled(dc_mr, true);
799         host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
800         if (ds->id) {
801             dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
802         } else {
803             dc_name = g_strdup("cxl-dcd-dpa-dc-space");
804         }
805         address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
806         g_free(dc_name);
807 
808         if (!cxl_create_dc_regions(ct3d, errp)) {
809             error_append_hint(errp, "setup DC regions failed");
810             return false;
811         }
812     }
813 
814     return true;
815 }
816 
817 static DOEProtocol doe_cdat_prot[] = {
818     { CXL_VENDOR_ID, CXL_DOE_TABLE_ACCESS, cxl_doe_cdat_rsp },
819     { }
820 };
821 
822 static void ct3_realize(PCIDevice *pci_dev, Error **errp)
823 {
824     ERRP_GUARD();
825     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
826     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
827     ComponentRegisters *regs = &cxl_cstate->crb;
828     MemoryRegion *mr = &regs->component_registers;
829     uint8_t *pci_conf = pci_dev->config;
830     unsigned short msix_num = 6;
831     int i, rc;
832 
833     QTAILQ_INIT(&ct3d->error_list);
834 
835     if (!cxl_setup_memory(ct3d, errp)) {
836         return;
837     }
838 
839     pci_config_set_prog_interface(pci_conf, 0x10);
840 
841     pcie_endpoint_cap_init(pci_dev, 0x80);
842     if (ct3d->sn != UI64_NULL) {
843         pcie_dev_ser_num_init(pci_dev, 0x100, ct3d->sn);
844         cxl_cstate->dvsec_offset = 0x100 + 0x0c;
845     } else {
846         cxl_cstate->dvsec_offset = 0x100;
847     }
848 
849     ct3d->cxl_cstate.pdev = pci_dev;
850     build_dvsecs(ct3d);
851 
852     regs->special_ops = g_new0(MemoryRegionOps, 1);
853     regs->special_ops->write = ct3d_reg_write;
854 
855     cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate,
856                                       TYPE_CXL_TYPE3);
857 
858     pci_register_bar(
859         pci_dev, CXL_COMPONENT_REG_BAR_IDX,
860         PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr);
861 
862     cxl_device_register_block_init(OBJECT(pci_dev), &ct3d->cxl_dstate,
863                                    &ct3d->cci);
864     pci_register_bar(pci_dev, CXL_DEVICE_REG_BAR_IDX,
865                      PCI_BASE_ADDRESS_SPACE_MEMORY |
866                          PCI_BASE_ADDRESS_MEM_TYPE_64,
867                      &ct3d->cxl_dstate.device_registers);
868 
869     /* MSI(-X) Initialization */
870     rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
871     if (rc) {
872         goto err_address_space_free;
873     }
874     for (i = 0; i < msix_num; i++) {
875         msix_vector_use(pci_dev, i);
876     }
877 
878     /* DOE Initialization */
879     pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
880 
881     cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
882     cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
883     cxl_cstate->cdat.private = ct3d;
884     if (!cxl_doe_cdat_init(cxl_cstate, errp)) {
885         goto err_free_special_ops;
886     }
887 
888     pcie_cap_deverr_init(pci_dev);
889     /* Leave a bit of room for expansion */
890     rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, NULL);
891     if (rc) {
892         goto err_release_cdat;
893     }
894     cxl_event_init(&ct3d->cxl_dstate, 2);
895 
896     return;
897 
898 err_release_cdat:
899     cxl_doe_cdat_release(cxl_cstate);
900 err_free_special_ops:
901     g_free(regs->special_ops);
902 err_address_space_free:
903     if (ct3d->dc.host_dc) {
904         cxl_destroy_dc_regions(ct3d);
905         address_space_destroy(&ct3d->dc.host_dc_as);
906     }
907     if (ct3d->hostpmem) {
908         address_space_destroy(&ct3d->hostpmem_as);
909     }
910     if (ct3d->hostvmem) {
911         address_space_destroy(&ct3d->hostvmem_as);
912     }
913     return;
914 }
915 
916 static void ct3_exit(PCIDevice *pci_dev)
917 {
918     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
919     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
920     ComponentRegisters *regs = &cxl_cstate->crb;
921 
922     pcie_aer_exit(pci_dev);
923     cxl_doe_cdat_release(cxl_cstate);
924     g_free(regs->special_ops);
925     if (ct3d->dc.host_dc) {
926         cxl_destroy_dc_regions(ct3d);
927         address_space_destroy(&ct3d->dc.host_dc_as);
928     }
929     if (ct3d->hostpmem) {
930         address_space_destroy(&ct3d->hostpmem_as);
931     }
932     if (ct3d->hostvmem) {
933         address_space_destroy(&ct3d->hostvmem_as);
934     }
935 }
936 
937 /*
938  * Mark the DPA range [dpa, dap + len - 1] to be backed and accessible. This
939  * happens when a DC extent is added and accepted by the host.
940  */
941 void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
942                                  uint64_t len)
943 {
944     CXLDCRegion *region;
945 
946     region = cxl_find_dc_region(ct3d, dpa, len);
947     if (!region) {
948         return;
949     }
950 
951     bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
952                len / region->block_size);
953 }
954 
955 /*
956  * Check whether the DPA range [dpa, dpa + len - 1] is backed with DC extents.
957  * Used when validating read/write to dc regions
958  */
959 bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
960                                   uint64_t len)
961 {
962     CXLDCRegion *region;
963     uint64_t nbits;
964     long nr;
965 
966     region = cxl_find_dc_region(ct3d, dpa, len);
967     if (!region) {
968         return false;
969     }
970 
971     nr = (dpa - region->base) / region->block_size;
972     nbits = DIV_ROUND_UP(len, region->block_size);
973     /*
974      * if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is
975      * backed with DC extents, return true; else return false.
976      */
977     return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + nbits;
978 }
979 
980 /*
981  * Mark the DPA range [dpa, dap + len - 1] to be unbacked and inaccessible.
982  * This happens when a dc extent is released by the host.
983  */
984 void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
985                                    uint64_t len)
986 {
987     CXLDCRegion *region;
988     uint64_t nbits;
989     long nr;
990 
991     region = cxl_find_dc_region(ct3d, dpa, len);
992     if (!region) {
993         return;
994     }
995 
996     nr = (dpa - region->base) / region->block_size;
997     nbits = len / region->block_size;
998     bitmap_clear(region->blk_bitmap, nr, nbits);
999 }
1000 
1001 static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
1002 {
1003     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
1004     uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
1005     unsigned int hdm_count;
1006     uint32_t cap;
1007     uint64_t dpa_base = 0;
1008     int i;
1009 
1010     cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY);
1011     hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap,
1012                                                  CXL_HDM_DECODER_CAPABILITY,
1013                                                  DECODER_COUNT));
1014 
1015     for (i = 0; i < hdm_count; i++) {
1016         uint64_t decoder_base, decoder_size, hpa_offset, skip;
1017         uint32_t hdm_ctrl, low, high;
1018         int ig, iw;
1019 
1020         low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc);
1021         high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc);
1022         decoder_base = ((uint64_t)high << 32) | (low & 0xf0000000);
1023 
1024         low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc);
1025         high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc);
1026         decoder_size = ((uint64_t)high << 32) | (low & 0xf0000000);
1027 
1028         low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_LO +
1029                        i * hdm_inc);
1030         high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_HI +
1031                         i * hdm_inc);
1032         skip = ((uint64_t)high << 32) | (low & 0xf0000000);
1033         dpa_base += skip;
1034 
1035         hpa_offset = (uint64_t)host_addr - decoder_base;
1036 
1037         hdm_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * hdm_inc);
1038         iw = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IW);
1039         ig = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IG);
1040         if (!FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) {
1041             return false;
1042         }
1043         if (((uint64_t)host_addr < decoder_base) ||
1044             (hpa_offset >= decoder_size)) {
1045             int decoded_iw = cxl_interleave_ways_dec(iw, &error_fatal);
1046 
1047             if (decoded_iw == 0) {
1048                 return false;
1049             }
1050 
1051             dpa_base += decoder_size / decoded_iw;
1052             continue;
1053         }
1054 
1055         *dpa = dpa_base +
1056             ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
1057              ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset)
1058               >> iw));
1059 
1060         return true;
1061     }
1062     return false;
1063 }
1064 
1065 static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
1066                                        hwaddr host_addr,
1067                                        unsigned int size,
1068                                        AddressSpace **as,
1069                                        uint64_t *dpa_offset)
1070 {
1071     MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
1072     uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
1073 
1074     if (ct3d->hostvmem) {
1075         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
1076         vmr_size = memory_region_size(vmr);
1077     }
1078     if (ct3d->hostpmem) {
1079         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
1080         pmr_size = memory_region_size(pmr);
1081     }
1082     if (ct3d->dc.host_dc) {
1083         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
1084         dc_size = memory_region_size(dc_mr);
1085     }
1086 
1087     if (!vmr && !pmr && !dc_mr) {
1088         return -ENODEV;
1089     }
1090 
1091     if (!cxl_type3_dpa(ct3d, host_addr, dpa_offset)) {
1092         return -EINVAL;
1093     }
1094 
1095     if (*dpa_offset >= vmr_size + pmr_size + dc_size) {
1096         return -EINVAL;
1097     }
1098 
1099     if (*dpa_offset < vmr_size) {
1100         *as = &ct3d->hostvmem_as;
1101     } else if (*dpa_offset < vmr_size + pmr_size) {
1102         *as = &ct3d->hostpmem_as;
1103         *dpa_offset -= vmr_size;
1104     } else {
1105         if (!ct3_test_region_block_backed(ct3d, *dpa_offset, size)) {
1106             return -ENODEV;
1107         }
1108 
1109         *as = &ct3d->dc.host_dc_as;
1110         *dpa_offset -= (vmr_size + pmr_size);
1111     }
1112 
1113     return 0;
1114 }
1115 
1116 MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
1117                            unsigned size, MemTxAttrs attrs)
1118 {
1119     CXLType3Dev *ct3d = CXL_TYPE3(d);
1120     uint64_t dpa_offset = 0;
1121     AddressSpace *as = NULL;
1122     int res;
1123 
1124     res = cxl_type3_hpa_to_as_and_dpa(ct3d, host_addr, size,
1125                                       &as, &dpa_offset);
1126     if (res) {
1127         return MEMTX_ERROR;
1128     }
1129 
1130     if (sanitize_running(&ct3d->cci)) {
1131         qemu_guest_getrandom_nofail(data, size);
1132         return MEMTX_OK;
1133     }
1134 
1135     return address_space_read(as, dpa_offset, attrs, data, size);
1136 }
1137 
1138 MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
1139                             unsigned size, MemTxAttrs attrs)
1140 {
1141     CXLType3Dev *ct3d = CXL_TYPE3(d);
1142     uint64_t dpa_offset = 0;
1143     AddressSpace *as = NULL;
1144     int res;
1145 
1146     res = cxl_type3_hpa_to_as_and_dpa(ct3d, host_addr, size,
1147                                       &as, &dpa_offset);
1148     if (res) {
1149         return MEMTX_ERROR;
1150     }
1151 
1152     if (sanitize_running(&ct3d->cci)) {
1153         return MEMTX_OK;
1154     }
1155 
1156     return address_space_write(as, dpa_offset, attrs, &data, size);
1157 }
1158 
1159 static void ct3d_reset(DeviceState *dev)
1160 {
1161     CXLType3Dev *ct3d = CXL_TYPE3(dev);
1162     uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1163     uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
1164 
1165     cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
1166     cxl_device_register_init_t3(ct3d);
1167 
1168     /*
1169      * Bring up an endpoint to target with MCTP over VDM.
1170      * This device is emulating an MLD with single LD for now.
1171      */
1172     cxl_initialize_t3_fm_owned_ld_mctpcci(&ct3d->vdm_fm_owned_ld_mctp_cci,
1173                                           DEVICE(ct3d), DEVICE(ct3d),
1174                                           512); /* Max payload made up */
1175     cxl_initialize_t3_ld_cci(&ct3d->ld0_cci, DEVICE(ct3d), DEVICE(ct3d),
1176                              512); /* Max payload made up */
1177 
1178 }
1179 
1180 static Property ct3_props[] = {
1181     DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND,
1182                      HostMemoryBackend *), /* for backward compatibility */
1183     DEFINE_PROP_LINK("persistent-memdev", CXLType3Dev, hostpmem,
1184                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1185     DEFINE_PROP_LINK("volatile-memdev", CXLType3Dev, hostvmem,
1186                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1187     DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
1188                      HostMemoryBackend *),
1189     DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
1190     DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
1191     DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
1192     DEFINE_PROP_LINK("volatile-dc-memdev", CXLType3Dev, dc.host_dc,
1193                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1194     DEFINE_PROP_END_OF_LIST(),
1195 };
1196 
1197 static uint64_t get_lsa_size(CXLType3Dev *ct3d)
1198 {
1199     MemoryRegion *mr;
1200 
1201     if (!ct3d->lsa) {
1202         return 0;
1203     }
1204 
1205     mr = host_memory_backend_get_memory(ct3d->lsa);
1206     return memory_region_size(mr);
1207 }
1208 
1209 static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
1210                                 uint64_t offset)
1211 {
1212     assert(offset + size <= memory_region_size(mr));
1213     assert(offset + size > offset);
1214 }
1215 
1216 static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
1217                     uint64_t offset)
1218 {
1219     MemoryRegion *mr;
1220     void *lsa;
1221 
1222     if (!ct3d->lsa) {
1223         return 0;
1224     }
1225 
1226     mr = host_memory_backend_get_memory(ct3d->lsa);
1227     validate_lsa_access(mr, size, offset);
1228 
1229     lsa = memory_region_get_ram_ptr(mr) + offset;
1230     memcpy(buf, lsa, size);
1231 
1232     return size;
1233 }
1234 
1235 static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
1236                     uint64_t offset)
1237 {
1238     MemoryRegion *mr;
1239     void *lsa;
1240 
1241     if (!ct3d->lsa) {
1242         return;
1243     }
1244 
1245     mr = host_memory_backend_get_memory(ct3d->lsa);
1246     validate_lsa_access(mr, size, offset);
1247 
1248     lsa = memory_region_get_ram_ptr(mr) + offset;
1249     memcpy(lsa, buf, size);
1250     memory_region_set_dirty(mr, offset, size);
1251 
1252     /*
1253      * Just like the PMEM, if the guest is not allowed to exit gracefully, label
1254      * updates will get lost.
1255      */
1256 }
1257 
1258 static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
1259 {
1260     MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
1261     AddressSpace *as;
1262     uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
1263 
1264     if (ct3d->hostvmem) {
1265         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
1266         vmr_size = memory_region_size(vmr);
1267     }
1268     if (ct3d->hostpmem) {
1269         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
1270         pmr_size = memory_region_size(pmr);
1271     }
1272     if (ct3d->dc.host_dc) {
1273         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
1274         dc_size = memory_region_size(dc_mr);
1275      }
1276 
1277     if (!vmr && !pmr && !dc_mr) {
1278         return false;
1279     }
1280 
1281     if (dpa_offset + CXL_CACHE_LINE_SIZE > vmr_size + pmr_size + dc_size) {
1282         return false;
1283     }
1284 
1285     if (dpa_offset < vmr_size) {
1286         as = &ct3d->hostvmem_as;
1287     } else if (dpa_offset < vmr_size + pmr_size) {
1288         as = &ct3d->hostpmem_as;
1289         dpa_offset -= vmr_size;
1290     } else {
1291         as = &ct3d->dc.host_dc_as;
1292         dpa_offset -= (vmr_size + pmr_size);
1293     }
1294 
1295     address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, data,
1296                         CXL_CACHE_LINE_SIZE);
1297     return true;
1298 }
1299 
1300 void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d)
1301 {
1302         ct3d->poison_list_overflowed = true;
1303         ct3d->poison_list_overflow_ts =
1304             cxl_device_get_timestamp(&ct3d->cxl_dstate);
1305 }
1306 
1307 void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
1308                            Error **errp)
1309 {
1310     Object *obj = object_resolve_path(path, NULL);
1311     CXLType3Dev *ct3d;
1312     CXLPoison *p;
1313 
1314     if (length % 64) {
1315         error_setg(errp, "Poison injection must be in multiples of 64 bytes");
1316         return;
1317     }
1318     if (start % 64) {
1319         error_setg(errp, "Poison start address must be 64 byte aligned");
1320         return;
1321     }
1322     if (!obj) {
1323         error_setg(errp, "Unable to resolve path");
1324         return;
1325     }
1326     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1327         error_setg(errp, "Path does not point to a CXL type 3 device");
1328         return;
1329     }
1330 
1331     ct3d = CXL_TYPE3(obj);
1332 
1333     QLIST_FOREACH(p, &ct3d->poison_list, node) {
1334         if (((start >= p->start) && (start < p->start + p->length)) ||
1335             ((start + length > p->start) &&
1336              (start + length <= p->start + p->length))) {
1337             error_setg(errp,
1338                        "Overlap with existing poisoned region not supported");
1339             return;
1340         }
1341     }
1342 
1343     if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) {
1344         cxl_set_poison_list_overflowed(ct3d);
1345         return;
1346     }
1347 
1348     p = g_new0(CXLPoison, 1);
1349     p->length = length;
1350     p->start = start;
1351     /* Different from injected via the mbox */
1352     p->type = CXL_POISON_TYPE_INTERNAL;
1353 
1354     QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
1355     ct3d->poison_list_cnt++;
1356 }
1357 
1358 /* For uncorrectable errors include support for multiple header recording */
1359 void qmp_cxl_inject_uncorrectable_errors(const char *path,
1360                                          CXLUncorErrorRecordList *errors,
1361                                          Error **errp)
1362 {
1363     Object *obj = object_resolve_path(path, NULL);
1364     static PCIEAERErr err = {};
1365     CXLType3Dev *ct3d;
1366     CXLError *cxl_err;
1367     uint32_t *reg_state;
1368     uint32_t unc_err;
1369     bool first;
1370 
1371     if (!obj) {
1372         error_setg(errp, "Unable to resolve path");
1373         return;
1374     }
1375 
1376     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1377         error_setg(errp, "Path does not point to a CXL type 3 device");
1378         return;
1379     }
1380 
1381     err.status = PCI_ERR_UNC_INTN;
1382     err.source_id = pci_requester_id(PCI_DEVICE(obj));
1383     err.flags = 0;
1384 
1385     ct3d = CXL_TYPE3(obj);
1386 
1387     first = QTAILQ_EMPTY(&ct3d->error_list);
1388     reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1389     while (errors) {
1390         uint32List *header = errors->value->header;
1391         uint8_t header_count = 0;
1392         int cxl_err_code;
1393 
1394         cxl_err_code = ct3d_qmp_uncor_err_to_cxl(errors->value->type);
1395         if (cxl_err_code < 0) {
1396             error_setg(errp, "Unknown error code");
1397             return;
1398         }
1399 
1400         /* If the error is masked, nothing to do here */
1401         if (!((1 << cxl_err_code) &
1402               ~ldl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK))) {
1403             errors = errors->next;
1404             continue;
1405         }
1406 
1407         cxl_err = g_malloc0(sizeof(*cxl_err));
1408 
1409         cxl_err->type = cxl_err_code;
1410         while (header && header_count < 32) {
1411             cxl_err->header[header_count++] = header->value;
1412             header = header->next;
1413         }
1414         if (header_count > 32) {
1415             error_setg(errp, "Header must be 32 DWORD or less");
1416             return;
1417         }
1418         QTAILQ_INSERT_TAIL(&ct3d->error_list, cxl_err, node);
1419 
1420         errors = errors->next;
1421     }
1422 
1423     if (first && !QTAILQ_EMPTY(&ct3d->error_list)) {
1424         uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
1425         uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
1426         uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
1427         int i;
1428 
1429         cxl_err = QTAILQ_FIRST(&ct3d->error_list);
1430         for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
1431             stl_le_p(header_log + i, cxl_err->header[i]);
1432         }
1433 
1434         capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
1435                              FIRST_ERROR_POINTER, cxl_err->type);
1436         stl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL, capctrl);
1437     }
1438 
1439     unc_err = 0;
1440     QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
1441         unc_err |= (1 << cxl_err->type);
1442     }
1443     if (!unc_err) {
1444         return;
1445     }
1446 
1447     stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, unc_err);
1448     pcie_aer_inject_error(PCI_DEVICE(obj), &err);
1449 
1450     return;
1451 }
1452 
1453 void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
1454                                       Error **errp)
1455 {
1456     static PCIEAERErr err = {};
1457     Object *obj = object_resolve_path(path, NULL);
1458     CXLType3Dev *ct3d;
1459     uint32_t *reg_state;
1460     uint32_t cor_err;
1461     int cxl_err_type;
1462 
1463     if (!obj) {
1464         error_setg(errp, "Unable to resolve path");
1465         return;
1466     }
1467     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1468         error_setg(errp, "Path does not point to a CXL type 3 device");
1469         return;
1470     }
1471 
1472     err.status = PCI_ERR_COR_INTERNAL;
1473     err.source_id = pci_requester_id(PCI_DEVICE(obj));
1474     err.flags = PCIE_AER_ERR_IS_CORRECTABLE;
1475 
1476     ct3d = CXL_TYPE3(obj);
1477     reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1478     cor_err = ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS);
1479 
1480     cxl_err_type = ct3d_qmp_cor_err_to_cxl(type);
1481     if (cxl_err_type < 0) {
1482         error_setg(errp, "Invalid COR error");
1483         return;
1484     }
1485     /* If the error is masked, nothting to do here */
1486     if (!((1 << cxl_err_type) &
1487           ~ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK))) {
1488         return;
1489     }
1490 
1491     cor_err |= (1 << cxl_err_type);
1492     stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, cor_err);
1493 
1494     pcie_aer_inject_error(PCI_DEVICE(obj), &err);
1495 }
1496 
1497 static void cxl_assign_event_header(CXLEventRecordHdr *hdr,
1498                                     const QemuUUID *uuid, uint32_t flags,
1499                                     uint8_t length, uint64_t timestamp)
1500 {
1501     st24_le_p(&hdr->flags, flags);
1502     hdr->length = length;
1503     memcpy(&hdr->id, uuid, sizeof(hdr->id));
1504     stq_le_p(&hdr->timestamp, timestamp);
1505 }
1506 
1507 static const QemuUUID gen_media_uuid = {
1508     .data = UUID(0xfbcd0a77, 0xc260, 0x417f,
1509                  0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6),
1510 };
1511 
1512 static const QemuUUID dram_uuid = {
1513     .data = UUID(0x601dcbb3, 0x9c06, 0x4eab, 0xb8, 0xaf,
1514                  0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24),
1515 };
1516 
1517 static const QemuUUID memory_module_uuid = {
1518     .data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86,
1519                  0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74),
1520 };
1521 
1522 #define CXL_GMER_VALID_CHANNEL                          BIT(0)
1523 #define CXL_GMER_VALID_RANK                             BIT(1)
1524 #define CXL_GMER_VALID_DEVICE                           BIT(2)
1525 #define CXL_GMER_VALID_COMPONENT                        BIT(3)
1526 
1527 static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
1528 {
1529     switch (log) {
1530     case CXL_EVENT_LOG_INFORMATIONAL:
1531         return CXL_EVENT_TYPE_INFO;
1532     case CXL_EVENT_LOG_WARNING:
1533         return CXL_EVENT_TYPE_WARN;
1534     case CXL_EVENT_LOG_FAILURE:
1535         return CXL_EVENT_TYPE_FAIL;
1536     case CXL_EVENT_LOG_FATAL:
1537         return CXL_EVENT_TYPE_FATAL;
1538     default:
1539         return -EINVAL;
1540     }
1541 }
1542 /* Component ID is device specific.  Define this as a string. */
1543 void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
1544                                         uint8_t flags, uint64_t dpa,
1545                                         uint8_t descriptor, uint8_t type,
1546                                         uint8_t transaction_type,
1547                                         bool has_channel, uint8_t channel,
1548                                         bool has_rank, uint8_t rank,
1549                                         bool has_device, uint32_t device,
1550                                         const char *component_id,
1551                                         Error **errp)
1552 {
1553     Object *obj = object_resolve_path(path, NULL);
1554     CXLEventGenMedia gem;
1555     CXLEventRecordHdr *hdr = &gem.hdr;
1556     CXLDeviceState *cxlds;
1557     CXLType3Dev *ct3d;
1558     uint16_t valid_flags = 0;
1559     uint8_t enc_log;
1560     int rc;
1561 
1562     if (!obj) {
1563         error_setg(errp, "Unable to resolve path");
1564         return;
1565     }
1566     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1567         error_setg(errp, "Path does not point to a CXL type 3 device");
1568         return;
1569     }
1570     ct3d = CXL_TYPE3(obj);
1571     cxlds = &ct3d->cxl_dstate;
1572 
1573     rc = ct3d_qmp_cxl_event_log_enc(log);
1574     if (rc < 0) {
1575         error_setg(errp, "Unhandled error log type");
1576         return;
1577     }
1578     enc_log = rc;
1579 
1580     memset(&gem, 0, sizeof(gem));
1581     cxl_assign_event_header(hdr, &gen_media_uuid, flags, sizeof(gem),
1582                             cxl_device_get_timestamp(&ct3d->cxl_dstate));
1583 
1584     stq_le_p(&gem.phys_addr, dpa);
1585     gem.descriptor = descriptor;
1586     gem.type = type;
1587     gem.transaction_type = transaction_type;
1588 
1589     if (has_channel) {
1590         gem.channel = channel;
1591         valid_flags |= CXL_GMER_VALID_CHANNEL;
1592     }
1593 
1594     if (has_rank) {
1595         gem.rank = rank;
1596         valid_flags |= CXL_GMER_VALID_RANK;
1597     }
1598 
1599     if (has_device) {
1600         st24_le_p(gem.device, device);
1601         valid_flags |= CXL_GMER_VALID_DEVICE;
1602     }
1603 
1604     if (component_id) {
1605         strncpy((char *)gem.component_id, component_id,
1606                 sizeof(gem.component_id) - 1);
1607         valid_flags |= CXL_GMER_VALID_COMPONENT;
1608     }
1609 
1610     stw_le_p(&gem.validity_flags, valid_flags);
1611 
1612     if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&gem)) {
1613         cxl_event_irq_assert(ct3d);
1614     }
1615 }
1616 
1617 #define CXL_DRAM_VALID_CHANNEL                          BIT(0)
1618 #define CXL_DRAM_VALID_RANK                             BIT(1)
1619 #define CXL_DRAM_VALID_NIBBLE_MASK                      BIT(2)
1620 #define CXL_DRAM_VALID_BANK_GROUP                       BIT(3)
1621 #define CXL_DRAM_VALID_BANK                             BIT(4)
1622 #define CXL_DRAM_VALID_ROW                              BIT(5)
1623 #define CXL_DRAM_VALID_COLUMN                           BIT(6)
1624 #define CXL_DRAM_VALID_CORRECTION_MASK                  BIT(7)
1625 
1626 void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
1627                                uint64_t dpa, uint8_t descriptor,
1628                                uint8_t type, uint8_t transaction_type,
1629                                bool has_channel, uint8_t channel,
1630                                bool has_rank, uint8_t rank,
1631                                bool has_nibble_mask, uint32_t nibble_mask,
1632                                bool has_bank_group, uint8_t bank_group,
1633                                bool has_bank, uint8_t bank,
1634                                bool has_row, uint32_t row,
1635                                bool has_column, uint16_t column,
1636                                bool has_correction_mask,
1637                                uint64List *correction_mask,
1638                                Error **errp)
1639 {
1640     Object *obj = object_resolve_path(path, NULL);
1641     CXLEventDram dram;
1642     CXLEventRecordHdr *hdr = &dram.hdr;
1643     CXLDeviceState *cxlds;
1644     CXLType3Dev *ct3d;
1645     uint16_t valid_flags = 0;
1646     uint8_t enc_log;
1647     int rc;
1648 
1649     if (!obj) {
1650         error_setg(errp, "Unable to resolve path");
1651         return;
1652     }
1653     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1654         error_setg(errp, "Path does not point to a CXL type 3 device");
1655         return;
1656     }
1657     ct3d = CXL_TYPE3(obj);
1658     cxlds = &ct3d->cxl_dstate;
1659 
1660     rc = ct3d_qmp_cxl_event_log_enc(log);
1661     if (rc < 0) {
1662         error_setg(errp, "Unhandled error log type");
1663         return;
1664     }
1665     enc_log = rc;
1666 
1667     memset(&dram, 0, sizeof(dram));
1668     cxl_assign_event_header(hdr, &dram_uuid, flags, sizeof(dram),
1669                             cxl_device_get_timestamp(&ct3d->cxl_dstate));
1670     stq_le_p(&dram.phys_addr, dpa);
1671     dram.descriptor = descriptor;
1672     dram.type = type;
1673     dram.transaction_type = transaction_type;
1674 
1675     if (has_channel) {
1676         dram.channel = channel;
1677         valid_flags |= CXL_DRAM_VALID_CHANNEL;
1678     }
1679 
1680     if (has_rank) {
1681         dram.rank = rank;
1682         valid_flags |= CXL_DRAM_VALID_RANK;
1683     }
1684 
1685     if (has_nibble_mask) {
1686         st24_le_p(dram.nibble_mask, nibble_mask);
1687         valid_flags |= CXL_DRAM_VALID_NIBBLE_MASK;
1688     }
1689 
1690     if (has_bank_group) {
1691         dram.bank_group = bank_group;
1692         valid_flags |= CXL_DRAM_VALID_BANK_GROUP;
1693     }
1694 
1695     if (has_bank) {
1696         dram.bank = bank;
1697         valid_flags |= CXL_DRAM_VALID_BANK;
1698     }
1699 
1700     if (has_row) {
1701         st24_le_p(dram.row, row);
1702         valid_flags |= CXL_DRAM_VALID_ROW;
1703     }
1704 
1705     if (has_column) {
1706         stw_le_p(&dram.column, column);
1707         valid_flags |= CXL_DRAM_VALID_COLUMN;
1708     }
1709 
1710     if (has_correction_mask) {
1711         int count = 0;
1712         while (correction_mask && count < 4) {
1713             stq_le_p(&dram.correction_mask[count],
1714                      correction_mask->value);
1715             count++;
1716             correction_mask = correction_mask->next;
1717         }
1718         valid_flags |= CXL_DRAM_VALID_CORRECTION_MASK;
1719     }
1720 
1721     stw_le_p(&dram.validity_flags, valid_flags);
1722 
1723     if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&dram)) {
1724         cxl_event_irq_assert(ct3d);
1725     }
1726     return;
1727 }
1728 
1729 void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
1730                                         uint8_t flags, uint8_t type,
1731                                         uint8_t health_status,
1732                                         uint8_t media_status,
1733                                         uint8_t additional_status,
1734                                         uint8_t life_used,
1735                                         int16_t temperature,
1736                                         uint32_t dirty_shutdown_count,
1737                                         uint32_t corrected_volatile_error_count,
1738                                         uint32_t corrected_persist_error_count,
1739                                         Error **errp)
1740 {
1741     Object *obj = object_resolve_path(path, NULL);
1742     CXLEventMemoryModule module;
1743     CXLEventRecordHdr *hdr = &module.hdr;
1744     CXLDeviceState *cxlds;
1745     CXLType3Dev *ct3d;
1746     uint8_t enc_log;
1747     int rc;
1748 
1749     if (!obj) {
1750         error_setg(errp, "Unable to resolve path");
1751         return;
1752     }
1753     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1754         error_setg(errp, "Path does not point to a CXL type 3 device");
1755         return;
1756     }
1757     ct3d = CXL_TYPE3(obj);
1758     cxlds = &ct3d->cxl_dstate;
1759 
1760     rc = ct3d_qmp_cxl_event_log_enc(log);
1761     if (rc < 0) {
1762         error_setg(errp, "Unhandled error log type");
1763         return;
1764     }
1765     enc_log = rc;
1766 
1767     memset(&module, 0, sizeof(module));
1768     cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module),
1769                             cxl_device_get_timestamp(&ct3d->cxl_dstate));
1770 
1771     module.type = type;
1772     module.health_status = health_status;
1773     module.media_status = media_status;
1774     module.additional_status = additional_status;
1775     module.life_used = life_used;
1776     stw_le_p(&module.temperature, temperature);
1777     stl_le_p(&module.dirty_shutdown_count, dirty_shutdown_count);
1778     stl_le_p(&module.corrected_volatile_error_count,
1779              corrected_volatile_error_count);
1780     stl_le_p(&module.corrected_persistent_error_count,
1781              corrected_persist_error_count);
1782 
1783     if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) {
1784         cxl_event_irq_assert(ct3d);
1785     }
1786 }
1787 
1788 /* CXL r3.1 Table 8-50: Dynamic Capacity Event Record */
1789 static const QemuUUID dynamic_capacity_uuid = {
1790     .data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
1791                  0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
1792 };
1793 
1794 typedef enum CXLDCEventType {
1795     DC_EVENT_ADD_CAPACITY = 0x0,
1796     DC_EVENT_RELEASE_CAPACITY = 0x1,
1797     DC_EVENT_FORCED_RELEASE_CAPACITY = 0x2,
1798     DC_EVENT_REGION_CONFIG_UPDATED = 0x3,
1799     DC_EVENT_ADD_CAPACITY_RSP = 0x4,
1800     DC_EVENT_CAPACITY_RELEASED = 0x5,
1801 } CXLDCEventType;
1802 
1803 /*
1804  * Check whether the range [dpa, dpa + len - 1] has overlaps with extents in
1805  * the list.
1806  * Return value: return true if has overlaps; otherwise, return false
1807  */
1808 static bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list,
1809                                            uint64_t dpa, uint64_t len)
1810 {
1811     CXLDCExtent *ent;
1812     Range range1, range2;
1813 
1814     if (!list) {
1815         return false;
1816     }
1817 
1818     range_init_nofail(&range1, dpa, len);
1819     QTAILQ_FOREACH(ent, list, node) {
1820         range_init_nofail(&range2, ent->start_dpa, ent->len);
1821         if (range_overlaps_range(&range1, &range2)) {
1822             return true;
1823         }
1824     }
1825     return false;
1826 }
1827 
1828 /*
1829  * Check whether the range [dpa, dpa + len - 1] is contained by extents in
1830  * the list.
1831  * Will check multiple extents containment once superset release is added.
1832  * Return value: return true if range is contained; otherwise, return false
1833  */
1834 bool cxl_extents_contains_dpa_range(CXLDCExtentList *list,
1835                                     uint64_t dpa, uint64_t len)
1836 {
1837     CXLDCExtent *ent;
1838     Range range1, range2;
1839 
1840     if (!list) {
1841         return false;
1842     }
1843 
1844     range_init_nofail(&range1, dpa, len);
1845     QTAILQ_FOREACH(ent, list, node) {
1846         range_init_nofail(&range2, ent->start_dpa, ent->len);
1847         if (range_contains_range(&range2, &range1)) {
1848             return true;
1849         }
1850     }
1851     return false;
1852 }
1853 
1854 static bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list,
1855                                                  uint64_t dpa, uint64_t len)
1856 {
1857     CXLDCExtentGroup *group;
1858 
1859     if (!list) {
1860         return false;
1861     }
1862 
1863     QTAILQ_FOREACH(group, list, node) {
1864         if (cxl_extents_overlaps_dpa_range(&group->list, dpa, len)) {
1865             return true;
1866         }
1867     }
1868     return false;
1869 }
1870 
1871 /*
1872  * The main function to process dynamic capacity event with extent list.
1873  * Currently DC extents add/release requests are processed.
1874  */
1875 static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
1876         uint16_t hid, CXLDCEventType type, uint8_t rid,
1877         CxlDynamicCapacityExtentList *records, Error **errp)
1878 {
1879     Object *obj;
1880     CXLEventDynamicCapacity dCap = {};
1881     CXLEventRecordHdr *hdr = &dCap.hdr;
1882     CXLType3Dev *dcd;
1883     uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
1884     uint32_t num_extents = 0;
1885     CxlDynamicCapacityExtentList *list;
1886     CXLDCExtentGroup *group = NULL;
1887     g_autofree CXLDCExtentRaw *extents = NULL;
1888     uint8_t enc_log = CXL_EVENT_TYPE_DYNAMIC_CAP;
1889     uint64_t dpa, offset, len, block_size;
1890     g_autofree unsigned long *blk_bitmap = NULL;
1891     int i;
1892 
1893     obj = object_resolve_path_type(path, TYPE_CXL_TYPE3, NULL);
1894     if (!obj) {
1895         error_setg(errp, "Unable to resolve CXL type 3 device");
1896         return;
1897     }
1898 
1899     dcd = CXL_TYPE3(obj);
1900     if (!dcd->dc.num_regions) {
1901         error_setg(errp, "No dynamic capacity support from the device");
1902         return;
1903     }
1904 
1905 
1906     if (rid >= dcd->dc.num_regions) {
1907         error_setg(errp, "region id is too large");
1908         return;
1909     }
1910     block_size = dcd->dc.regions[rid].block_size;
1911     blk_bitmap = bitmap_new(dcd->dc.regions[rid].len / block_size);
1912 
1913     /* Sanity check and count the extents */
1914     list = records;
1915     while (list) {
1916         offset = list->value->offset;
1917         len = list->value->len;
1918         dpa = offset + dcd->dc.regions[rid].base;
1919 
1920         if (len == 0) {
1921             error_setg(errp, "extent with 0 length is not allowed");
1922             return;
1923         }
1924 
1925         if (offset % block_size || len % block_size) {
1926             error_setg(errp, "dpa or len is not aligned to region block size");
1927             return;
1928         }
1929 
1930         if (offset + len > dcd->dc.regions[rid].len) {
1931             error_setg(errp, "extent range is beyond the region end");
1932             return;
1933         }
1934 
1935         /* No duplicate or overlapped extents are allowed */
1936         if (test_any_bits_set(blk_bitmap, offset / block_size,
1937                               len / block_size)) {
1938             error_setg(errp, "duplicate or overlapped extents are detected");
1939             return;
1940         }
1941         bitmap_set(blk_bitmap, offset / block_size, len / block_size);
1942 
1943         if (type == DC_EVENT_RELEASE_CAPACITY) {
1944             if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending,
1945                                                      dpa, len)) {
1946                 error_setg(errp,
1947                            "cannot release extent with pending DPA range");
1948                 return;
1949             }
1950             if (!ct3_test_region_block_backed(dcd, dpa, len)) {
1951                 error_setg(errp,
1952                            "cannot release extent with non-existing DPA range");
1953                 return;
1954             }
1955         } else if (type == DC_EVENT_ADD_CAPACITY) {
1956             if (cxl_extents_overlaps_dpa_range(&dcd->dc.extents, dpa, len)) {
1957                 error_setg(errp,
1958                            "cannot add DPA already accessible to the same LD");
1959                 return;
1960             }
1961             if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending,
1962                                                      dpa, len)) {
1963                 error_setg(errp,
1964                            "cannot add DPA again while still pending");
1965                 return;
1966             }
1967         }
1968         list = list->next;
1969         num_extents++;
1970     }
1971 
1972     /* Create extent list for event being passed to host */
1973     i = 0;
1974     list = records;
1975     extents = g_new0(CXLDCExtentRaw, num_extents);
1976     while (list) {
1977         offset = list->value->offset;
1978         len = list->value->len;
1979         dpa = dcd->dc.regions[rid].base + offset;
1980 
1981         extents[i].start_dpa = dpa;
1982         extents[i].len = len;
1983         memset(extents[i].tag, 0, 0x10);
1984         extents[i].shared_seq = 0;
1985         if (type == DC_EVENT_ADD_CAPACITY) {
1986             group = cxl_insert_extent_to_extent_group(group,
1987                                                       extents[i].start_dpa,
1988                                                       extents[i].len,
1989                                                       extents[i].tag,
1990                                                       extents[i].shared_seq);
1991         }
1992 
1993         list = list->next;
1994         i++;
1995     }
1996     if (group) {
1997         cxl_extent_group_list_insert_tail(&dcd->dc.extents_pending, group);
1998     }
1999 
2000     /*
2001      * CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record
2002      *
2003      * All Dynamic Capacity event records shall set the Event Record Severity
2004      * field in the Common Event Record Format to Informational Event. All
2005      * Dynamic Capacity related events shall be logged in the Dynamic Capacity
2006      * Event Log.
2007      */
2008     cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
2009                             cxl_device_get_timestamp(&dcd->cxl_dstate));
2010 
2011     dCap.type = type;
2012     /* FIXME: for now, validity flag is cleared */
2013     dCap.validity_flags = 0;
2014     stw_le_p(&dCap.host_id, hid);
2015     /* only valid for DC_REGION_CONFIG_UPDATED event */
2016     dCap.updated_region_id = 0;
2017     dCap.flags = 0;
2018     for (i = 0; i < num_extents; i++) {
2019         memcpy(&dCap.dynamic_capacity_extent, &extents[i],
2020                sizeof(CXLDCExtentRaw));
2021 
2022         if (i < num_extents - 1) {
2023             /* Set "More" flag */
2024             dCap.flags |= BIT(0);
2025         }
2026 
2027         if (cxl_event_insert(&dcd->cxl_dstate, enc_log,
2028                              (CXLEventRecordRaw *)&dCap)) {
2029             cxl_event_irq_assert(dcd);
2030         }
2031     }
2032 }
2033 
2034 void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id,
2035                                   CxlExtentSelectionPolicy sel_policy,
2036                                   uint8_t region, const char *tag,
2037                                   CxlDynamicCapacityExtentList  *extents,
2038                                   Error **errp)
2039 {
2040     switch (sel_policy) {
2041     case CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE:
2042         qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id,
2043                                                       DC_EVENT_ADD_CAPACITY,
2044                                                       region, extents, errp);
2045         return;
2046     default:
2047         error_setg(errp, "Selection policy not supported");
2048         return;
2049     }
2050 }
2051 
2052 void qmp_cxl_release_dynamic_capacity(const char *path, uint16_t host_id,
2053                                       CxlExtentRemovalPolicy removal_policy,
2054                                       bool has_forced_removal,
2055                                       bool forced_removal,
2056                                       bool has_sanitize_on_release,
2057                                       bool sanitize_on_release,
2058                                       uint8_t region,
2059                                       const char *tag,
2060                                       CxlDynamicCapacityExtentList  *extents,
2061                                       Error **errp)
2062 {
2063     CXLDCEventType type = DC_EVENT_RELEASE_CAPACITY;
2064 
2065     if (has_forced_removal && forced_removal) {
2066         /* TODO: enable forced removal in the future */
2067         type = DC_EVENT_FORCED_RELEASE_CAPACITY;
2068         error_setg(errp, "Forced removal not supported yet");
2069         return;
2070     }
2071 
2072     switch (removal_policy) {
2073     case CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE:
2074         qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id, type,
2075                                                       region, extents, errp);
2076         return;
2077     default:
2078         error_setg(errp, "Removal policy not supported");
2079         return;
2080     }
2081 }
2082 
2083 static void ct3_class_init(ObjectClass *oc, void *data)
2084 {
2085     DeviceClass *dc = DEVICE_CLASS(oc);
2086     PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
2087     CXLType3Class *cvc = CXL_TYPE3_CLASS(oc);
2088 
2089     pc->realize = ct3_realize;
2090     pc->exit = ct3_exit;
2091     pc->class_id = PCI_CLASS_MEMORY_CXL;
2092     pc->vendor_id = PCI_VENDOR_ID_INTEL;
2093     pc->device_id = 0xd93; /* LVF for now */
2094     pc->revision = 1;
2095 
2096     pc->config_write = ct3d_config_write;
2097     pc->config_read = ct3d_config_read;
2098 
2099     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
2100     dc->desc = "CXL Memory Device (Type 3)";
2101     dc->reset = ct3d_reset;
2102     device_class_set_props(dc, ct3_props);
2103 
2104     cvc->get_lsa_size = get_lsa_size;
2105     cvc->get_lsa = get_lsa;
2106     cvc->set_lsa = set_lsa;
2107     cvc->set_cacheline = set_cacheline;
2108 }
2109 
2110 static const TypeInfo ct3d_info = {
2111     .name = TYPE_CXL_TYPE3,
2112     .parent = TYPE_PCI_DEVICE,
2113     .class_size = sizeof(struct CXLType3Class),
2114     .class_init = ct3_class_init,
2115     .instance_size = sizeof(CXLType3Dev),
2116     .interfaces = (InterfaceInfo[]) {
2117         { INTERFACE_CXL_DEVICE },
2118         { INTERFACE_PCIE_DEVICE },
2119         {}
2120     },
2121 };
2122 
2123 static void ct3d_registers(void)
2124 {
2125     type_register_static(&ct3d_info);
2126 }
2127 
2128 type_init(ct3d_registers);
2129