xref: /qemu/hw/mem/cxl_type3.c (revision 3d881164d4fb2b0f6791cf28d9725926b8ded0d6)
1 /*
2  * CXL Type 3 (memory expander) device
3  *
4  * Copyright(C) 2020 Intel Corporation.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2. See the
7  * COPYING file in the top-level directory.
8  *
9  * SPDX-License-Identifier: GPL-v2-only
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/units.h"
14 #include "qemu/error-report.h"
15 #include "qapi/qapi-commands-cxl.h"
16 #include "hw/mem/memory-device.h"
17 #include "hw/mem/pc-dimm.h"
18 #include "hw/pci/pci.h"
19 #include "hw/qdev-properties.h"
20 #include "hw/qdev-properties-system.h"
21 #include "qapi/error.h"
22 #include "qemu/log.h"
23 #include "qemu/module.h"
24 #include "qemu/pmem.h"
25 #include "qemu/range.h"
26 #include "qemu/rcu.h"
27 #include "qemu/guest-random.h"
28 #include "system/hostmem.h"
29 #include "system/numa.h"
30 #include "hw/cxl/cxl.h"
31 #include "hw/pci/msix.h"
32 
33 /* type3 device private */
34 enum CXL_T3_MSIX_VECTOR {
35     CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS = 0,
36     CXL_T3_MSIX_EVENT_START = 2,
37     CXL_T3_MSIX_MBOX = CXL_T3_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
38     CXL_T3_MSIX_VECTOR_NR
39 };
40 
41 #define DWORD_BYTE 4
42 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
43 
44 /* Default CDAT entries for a memory region */
45 enum {
46     CT3_CDAT_DSMAS,
47     CT3_CDAT_DSLBIS0,
48     CT3_CDAT_DSLBIS1,
49     CT3_CDAT_DSLBIS2,
50     CT3_CDAT_DSLBIS3,
51     CT3_CDAT_DSEMTS,
52     CT3_CDAT_NUM_ENTRIES
53 };
54 
55 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
56                                           int dsmad_handle, uint64_t size,
57                                           bool is_pmem, bool is_dynamic,
58                                           uint64_t dpa_base)
59 {
60     CDATDsmas *dsmas;
61     CDATDslbis *dslbis0;
62     CDATDslbis *dslbis1;
63     CDATDslbis *dslbis2;
64     CDATDslbis *dslbis3;
65     CDATDsemts *dsemts;
66 
67     dsmas = g_malloc(sizeof(*dsmas));
68     *dsmas = (CDATDsmas) {
69         .header = {
70             .type = CDAT_TYPE_DSMAS,
71             .length = sizeof(*dsmas),
72         },
73         .DSMADhandle = dsmad_handle,
74         .flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) |
75                  (is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0),
76         .DPA_base = dpa_base,
77         .DPA_length = size,
78     };
79 
80     /* For now, no memory side cache, plausiblish numbers */
81     dslbis0 = g_malloc(sizeof(*dslbis0));
82     *dslbis0 = (CDATDslbis) {
83         .header = {
84             .type = CDAT_TYPE_DSLBIS,
85             .length = sizeof(*dslbis0),
86         },
87         .handle = dsmad_handle,
88         .flags = HMAT_LB_MEM_MEMORY,
89         .data_type = HMAT_LB_DATA_READ_LATENCY,
90         .entry_base_unit = 10000, /* 10ns base */
91         .entry[0] = 15, /* 150ns */
92     };
93 
94     dslbis1 = g_malloc(sizeof(*dslbis1));
95     *dslbis1 = (CDATDslbis) {
96         .header = {
97             .type = CDAT_TYPE_DSLBIS,
98             .length = sizeof(*dslbis1),
99         },
100         .handle = dsmad_handle,
101         .flags = HMAT_LB_MEM_MEMORY,
102         .data_type = HMAT_LB_DATA_WRITE_LATENCY,
103         .entry_base_unit = 10000,
104         .entry[0] = 25, /* 250ns */
105     };
106 
107     dslbis2 = g_malloc(sizeof(*dslbis2));
108     *dslbis2 = (CDATDslbis) {
109         .header = {
110             .type = CDAT_TYPE_DSLBIS,
111             .length = sizeof(*dslbis2),
112         },
113         .handle = dsmad_handle,
114         .flags = HMAT_LB_MEM_MEMORY,
115         .data_type = HMAT_LB_DATA_READ_BANDWIDTH,
116         .entry_base_unit = 1000, /* GB/s */
117         .entry[0] = 16,
118     };
119 
120     dslbis3 = g_malloc(sizeof(*dslbis3));
121     *dslbis3 = (CDATDslbis) {
122         .header = {
123             .type = CDAT_TYPE_DSLBIS,
124             .length = sizeof(*dslbis3),
125         },
126         .handle = dsmad_handle,
127         .flags = HMAT_LB_MEM_MEMORY,
128         .data_type = HMAT_LB_DATA_WRITE_BANDWIDTH,
129         .entry_base_unit = 1000, /* GB/s */
130         .entry[0] = 16,
131     };
132 
133     dsemts = g_malloc(sizeof(*dsemts));
134     *dsemts = (CDATDsemts) {
135         .header = {
136             .type = CDAT_TYPE_DSEMTS,
137             .length = sizeof(*dsemts),
138         },
139         .DSMAS_handle = dsmad_handle,
140         /*
141          * NV: Reserved - the non volatile from DSMAS matters
142          * V: EFI_MEMORY_SP
143          */
144         .EFI_memory_type_attr = is_pmem ? 2 : 1,
145         .DPA_offset = 0,
146         .DPA_length = size,
147     };
148 
149     /* Header always at start of structure */
150     cdat_table[CT3_CDAT_DSMAS] = (CDATSubHeader *)dsmas;
151     cdat_table[CT3_CDAT_DSLBIS0] = (CDATSubHeader *)dslbis0;
152     cdat_table[CT3_CDAT_DSLBIS1] = (CDATSubHeader *)dslbis1;
153     cdat_table[CT3_CDAT_DSLBIS2] = (CDATSubHeader *)dslbis2;
154     cdat_table[CT3_CDAT_DSLBIS3] = (CDATSubHeader *)dslbis3;
155     cdat_table[CT3_CDAT_DSEMTS] = (CDATSubHeader *)dsemts;
156 }
157 
158 static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
159 {
160     g_autofree CDATSubHeader **table = NULL;
161     CXLType3Dev *ct3d = priv;
162     MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
163     MemoryRegion *dc_mr = NULL;
164     uint64_t vmr_size = 0, pmr_size = 0;
165     int dsmad_handle = 0;
166     int cur_ent = 0;
167     int len = 0;
168 
169     if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
170         return 0;
171     }
172 
173     if (ct3d->hostvmem) {
174         volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
175         if (!volatile_mr) {
176             return -EINVAL;
177         }
178         len += CT3_CDAT_NUM_ENTRIES;
179         vmr_size = memory_region_size(volatile_mr);
180     }
181 
182     if (ct3d->hostpmem) {
183         nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostpmem);
184         if (!nonvolatile_mr) {
185             return -EINVAL;
186         }
187         len += CT3_CDAT_NUM_ENTRIES;
188         pmr_size = memory_region_size(nonvolatile_mr);
189     }
190 
191     if (ct3d->dc.num_regions) {
192         if (!ct3d->dc.host_dc) {
193             return -EINVAL;
194         }
195         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
196         if (!dc_mr) {
197             return -EINVAL;
198         }
199         len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
200     }
201 
202     table = g_malloc0(len * sizeof(*table));
203 
204     /* Now fill them in */
205     if (volatile_mr) {
206         ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size,
207                                       false, false, 0);
208         cur_ent = CT3_CDAT_NUM_ENTRIES;
209     }
210 
211     if (nonvolatile_mr) {
212         uint64_t base = vmr_size;
213         ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
214                                       pmr_size, true, false, base);
215         cur_ent += CT3_CDAT_NUM_ENTRIES;
216     }
217 
218     if (dc_mr) {
219         int i;
220         uint64_t region_base = vmr_size + pmr_size;
221 
222         /*
223          * We assume the dynamic capacity to be volatile for now.
224          * Non-volatile dynamic capacity will be added if needed in the
225          * future.
226          */
227         for (i = 0; i < ct3d->dc.num_regions; i++) {
228             ct3_build_cdat_entries_for_mr(&(table[cur_ent]),
229                                           dsmad_handle++,
230                                           ct3d->dc.regions[i].len,
231                                           false, true, region_base);
232             ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1;
233 
234             cur_ent += CT3_CDAT_NUM_ENTRIES;
235             region_base += ct3d->dc.regions[i].len;
236         }
237     }
238 
239     assert(len == cur_ent);
240 
241     *cdat_table = g_steal_pointer(&table);
242 
243     return len;
244 }
245 
246 static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv)
247 {
248     int i;
249 
250     for (i = 0; i < num; i++) {
251         g_free(cdat_table[i]);
252     }
253     g_free(cdat_table);
254 }
255 
256 static bool cxl_doe_cdat_rsp(DOECap *doe_cap)
257 {
258     CDATObject *cdat = &CXL_TYPE3(doe_cap->pdev)->cxl_cstate.cdat;
259     uint16_t ent;
260     void *base;
261     uint32_t len;
262     CDATReq *req = pcie_doe_get_write_mbox_ptr(doe_cap);
263     CDATRsp rsp;
264 
265     assert(cdat->entry_len);
266 
267     /* Discard if request length mismatched */
268     if (pcie_doe_get_obj_len(req) <
269         DIV_ROUND_UP(sizeof(CDATReq), DWORD_BYTE)) {
270         return false;
271     }
272 
273     ent = req->entry_handle;
274     base = cdat->entry[ent].base;
275     len = cdat->entry[ent].length;
276 
277     rsp = (CDATRsp) {
278         .header = {
279             .vendor_id = CXL_VENDOR_ID,
280             .data_obj_type = CXL_DOE_TABLE_ACCESS,
281             .reserved = 0x0,
282             .length = DIV_ROUND_UP((sizeof(rsp) + len), DWORD_BYTE),
283         },
284         .rsp_code = CXL_DOE_TAB_RSP,
285         .table_type = CXL_DOE_TAB_TYPE_CDAT,
286         .entry_handle = (ent < cdat->entry_len - 1) ?
287                         ent + 1 : CXL_DOE_TAB_ENT_MAX,
288     };
289 
290     memcpy(doe_cap->read_mbox, &rsp, sizeof(rsp));
291     memcpy(doe_cap->read_mbox + DIV_ROUND_UP(sizeof(rsp), DWORD_BYTE),
292            base, len);
293 
294     doe_cap->read_mbox_len += rsp.header.length;
295 
296     return true;
297 }
298 
299 static uint32_t ct3d_config_read(PCIDevice *pci_dev, uint32_t addr, int size)
300 {
301     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
302     uint32_t val;
303 
304     if (pcie_doe_read_config(&ct3d->doe_cdat, addr, size, &val)) {
305         return val;
306     }
307 
308     return pci_default_read_config(pci_dev, addr, size);
309 }
310 
311 static void ct3d_config_write(PCIDevice *pci_dev, uint32_t addr, uint32_t val,
312                               int size)
313 {
314     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
315 
316     pcie_doe_write_config(&ct3d->doe_cdat, addr, val, size);
317     pci_default_write_config(pci_dev, addr, val, size);
318     pcie_aer_write_config(pci_dev, addr, val, size);
319 }
320 
321 /*
322  * Null value of all Fs suggested by IEEE RA guidelines for use of
323  * EU, OUI and CID
324  */
325 #define UI64_NULL ~(0ULL)
326 
327 static void build_dvsecs(CXLType3Dev *ct3d)
328 {
329     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
330     uint8_t *dvsec;
331     uint32_t range1_size_hi, range1_size_lo,
332              range1_base_hi = 0, range1_base_lo = 0,
333              range2_size_hi = 0, range2_size_lo = 0,
334              range2_base_hi = 0, range2_base_lo = 0;
335 
336     /*
337      * Volatile memory is mapped as (0x0)
338      * Persistent memory is mapped at (volatile->size)
339      */
340     if (ct3d->hostvmem) {
341         range1_size_hi = ct3d->hostvmem->size >> 32;
342         range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
343                          (ct3d->hostvmem->size & 0xF0000000);
344         if (ct3d->hostpmem) {
345             range2_size_hi = ct3d->hostpmem->size >> 32;
346             range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
347                              (ct3d->hostpmem->size & 0xF0000000);
348         }
349     } else if (ct3d->hostpmem) {
350         range1_size_hi = ct3d->hostpmem->size >> 32;
351         range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
352                          (ct3d->hostpmem->size & 0xF0000000);
353     } else {
354         /*
355          * For DCD with no static memory, set memory active, memory class bits.
356          * No range is set.
357          */
358         range1_size_hi = 0;
359         range1_size_lo = (2 << 5) | (2 << 2) | 0x3;
360     }
361 
362     dvsec = (uint8_t *)&(CXLDVSECDevice){
363         .cap = 0x1e,
364         .ctrl = 0x2,
365         .status2 = 0x2,
366         .range1_size_hi = range1_size_hi,
367         .range1_size_lo = range1_size_lo,
368         .range1_base_hi = range1_base_hi,
369         .range1_base_lo = range1_base_lo,
370         .range2_size_hi = range2_size_hi,
371         .range2_size_lo = range2_size_lo,
372         .range2_base_hi = range2_base_hi,
373         .range2_base_lo = range2_base_lo,
374     };
375     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
376                                PCIE_CXL_DEVICE_DVSEC_LENGTH,
377                                PCIE_CXL_DEVICE_DVSEC,
378                                PCIE_CXL31_DEVICE_DVSEC_REVID, dvsec);
379 
380     dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
381         .rsvd         = 0,
382         .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
383         .reg0_base_hi = 0,
384         .reg1_base_lo = RBI_CXL_DEVICE_REG | CXL_DEVICE_REG_BAR_IDX,
385         .reg1_base_hi = 0,
386     };
387     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
388                                REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
389                                REG_LOC_DVSEC_REVID, dvsec);
390     dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){
391         .phase2_duration = 0x603, /* 3 seconds */
392         .phase2_power = 0x33, /* 0x33 miliwatts */
393     };
394     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
395                                GPF_DEVICE_DVSEC_LENGTH, GPF_DEVICE_DVSEC,
396                                GPF_DEVICE_DVSEC_REVID, dvsec);
397 
398     dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){
399         .cap                     = 0x26, /* 68B, IO, Mem, non-MLD */
400         .ctrl                    = 0x02, /* IO always enabled */
401         .status                  = 0x26, /* same as capabilities */
402         .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */
403     };
404     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
405                                PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH,
406                                PCIE_FLEXBUS_PORT_DVSEC,
407                                PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, dvsec);
408 }
409 
410 static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
411 {
412     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
413     ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
414     uint32_t *cache_mem = cregs->cache_mem_registers;
415     uint32_t ctrl;
416 
417     ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc);
418     /* TODO: Sanity checks that the decoder is possible */
419     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0);
420     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
421 
422     stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl);
423 }
424 
425 static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int which)
426 {
427     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
428     ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
429     uint32_t *cache_mem = cregs->cache_mem_registers;
430     uint32_t ctrl;
431 
432     ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc);
433 
434     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0);
435     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 0);
436 
437     stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl);
438 }
439 
440 static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)
441 {
442     switch (qmp_err) {
443     case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY:
444         return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY;
445     case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY:
446         return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY;
447     case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY:
448         return CXL_RAS_UNC_ERR_CACHE_BE_PARITY;
449     case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC:
450         return CXL_RAS_UNC_ERR_CACHE_DATA_ECC;
451     case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY:
452         return CXL_RAS_UNC_ERR_MEM_DATA_PARITY;
453     case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY:
454         return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY;
455     case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY:
456         return CXL_RAS_UNC_ERR_MEM_BE_PARITY;
457     case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC:
458         return CXL_RAS_UNC_ERR_MEM_DATA_ECC;
459     case CXL_UNCOR_ERROR_TYPE_REINIT_THRESHOLD:
460         return CXL_RAS_UNC_ERR_REINIT_THRESHOLD;
461     case CXL_UNCOR_ERROR_TYPE_RSVD_ENCODING:
462         return CXL_RAS_UNC_ERR_RSVD_ENCODING;
463     case CXL_UNCOR_ERROR_TYPE_POISON_RECEIVED:
464         return CXL_RAS_UNC_ERR_POISON_RECEIVED;
465     case CXL_UNCOR_ERROR_TYPE_RECEIVER_OVERFLOW:
466         return CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW;
467     case CXL_UNCOR_ERROR_TYPE_INTERNAL:
468         return CXL_RAS_UNC_ERR_INTERNAL;
469     case CXL_UNCOR_ERROR_TYPE_CXL_IDE_TX:
470         return CXL_RAS_UNC_ERR_CXL_IDE_TX;
471     case CXL_UNCOR_ERROR_TYPE_CXL_IDE_RX:
472         return CXL_RAS_UNC_ERR_CXL_IDE_RX;
473     default:
474         return -EINVAL;
475     }
476 }
477 
478 static int ct3d_qmp_cor_err_to_cxl(CxlCorErrorType qmp_err)
479 {
480     switch (qmp_err) {
481     case CXL_COR_ERROR_TYPE_CACHE_DATA_ECC:
482         return CXL_RAS_COR_ERR_CACHE_DATA_ECC;
483     case CXL_COR_ERROR_TYPE_MEM_DATA_ECC:
484         return CXL_RAS_COR_ERR_MEM_DATA_ECC;
485     case CXL_COR_ERROR_TYPE_CRC_THRESHOLD:
486         return CXL_RAS_COR_ERR_CRC_THRESHOLD;
487     case CXL_COR_ERROR_TYPE_RETRY_THRESHOLD:
488         return CXL_RAS_COR_ERR_RETRY_THRESHOLD;
489     case CXL_COR_ERROR_TYPE_CACHE_POISON_RECEIVED:
490         return CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED;
491     case CXL_COR_ERROR_TYPE_MEM_POISON_RECEIVED:
492         return CXL_RAS_COR_ERR_MEM_POISON_RECEIVED;
493     case CXL_COR_ERROR_TYPE_PHYSICAL:
494         return CXL_RAS_COR_ERR_PHYSICAL;
495     default:
496         return -EINVAL;
497     }
498 }
499 
500 static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
501                            unsigned size)
502 {
503     CXLComponentState *cxl_cstate = opaque;
504     ComponentRegisters *cregs = &cxl_cstate->crb;
505     CXLType3Dev *ct3d = container_of(cxl_cstate, CXLType3Dev, cxl_cstate);
506     uint32_t *cache_mem = cregs->cache_mem_registers;
507     bool should_commit = false;
508     bool should_uncommit = false;
509     int which_hdm = -1;
510 
511     assert(size == 4);
512     g_assert(offset < CXL2_COMPONENT_CM_REGION_SIZE);
513 
514     switch (offset) {
515     case A_CXL_HDM_DECODER0_CTRL:
516         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
517         should_uncommit = !should_commit;
518         which_hdm = 0;
519         break;
520     case A_CXL_HDM_DECODER1_CTRL:
521         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
522         should_uncommit = !should_commit;
523         which_hdm = 1;
524         break;
525     case A_CXL_HDM_DECODER2_CTRL:
526         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
527         should_uncommit = !should_commit;
528         which_hdm = 2;
529         break;
530     case A_CXL_HDM_DECODER3_CTRL:
531         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
532         should_uncommit = !should_commit;
533         which_hdm = 3;
534         break;
535     case A_CXL_RAS_UNC_ERR_STATUS:
536     {
537         uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
538         uint32_t fe = FIELD_EX32(capctrl, CXL_RAS_ERR_CAP_CTRL,
539                                  FIRST_ERROR_POINTER);
540         CXLError *cxl_err;
541         uint32_t unc_err;
542 
543         /*
544          * If single bit written that corresponds to the first error
545          * pointer being cleared, update the status and header log.
546          */
547         if (!QTAILQ_EMPTY(&ct3d->error_list)) {
548             if ((1 << fe) ^ value) {
549                 CXLError *cxl_next;
550                 /*
551                  * Software is using wrong flow for multiple header recording
552                  * Following behavior in PCIe r6.0 and assuming multiple
553                  * header support. Implementation defined choice to clear all
554                  * matching records if more than one bit set - which corresponds
555                  * closest to behavior of hardware not capable of multiple
556                  * header recording.
557                  */
558                 QTAILQ_FOREACH_SAFE(cxl_err, &ct3d->error_list, node,
559                                     cxl_next) {
560                     if ((1 << cxl_err->type) & value) {
561                         QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
562                         g_free(cxl_err);
563                     }
564                 }
565             } else {
566                 /* Done with previous FE, so drop from list */
567                 cxl_err = QTAILQ_FIRST(&ct3d->error_list);
568                 QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
569                 g_free(cxl_err);
570             }
571 
572             /*
573              * If there is another FE, then put that in place and update
574              * the header log
575              */
576             if (!QTAILQ_EMPTY(&ct3d->error_list)) {
577                 uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
578                 int i;
579 
580                 cxl_err = QTAILQ_FIRST(&ct3d->error_list);
581                 for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
582                     stl_le_p(header_log + i, cxl_err->header[i]);
583                 }
584                 capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
585                                      FIRST_ERROR_POINTER, cxl_err->type);
586             } else {
587                 /*
588                  * If no more errors, then follow recommendation of PCI spec
589                  * r6.0 6.2.4.2 to set the first error pointer to a status
590                  * bit that will never be used.
591                  */
592                 capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
593                                      FIRST_ERROR_POINTER,
594                                      CXL_RAS_UNC_ERR_CXL_UNUSED);
595             }
596             stl_le_p((uint8_t *)cache_mem + A_CXL_RAS_ERR_CAP_CTRL, capctrl);
597         }
598         unc_err = 0;
599         QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
600             unc_err |= 1 << cxl_err->type;
601         }
602         stl_le_p((uint8_t *)cache_mem + offset, unc_err);
603 
604         return;
605     }
606     case A_CXL_RAS_COR_ERR_STATUS:
607     {
608         uint32_t rw1c = value;
609         uint32_t temp = ldl_le_p((uint8_t *)cache_mem + offset);
610         temp &= ~rw1c;
611         stl_le_p((uint8_t *)cache_mem + offset, temp);
612         return;
613     }
614     default:
615         break;
616     }
617 
618     stl_le_p((uint8_t *)cache_mem + offset, value);
619     if (should_commit) {
620         hdm_decoder_commit(ct3d, which_hdm);
621     } else if (should_uncommit) {
622         hdm_decoder_uncommit(ct3d, which_hdm);
623     }
624 }
625 
626 /*
627  * TODO: dc region configuration will be updated once host backend and address
628  * space support is added for DCD.
629  */
630 static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
631 {
632     int i;
633     uint64_t region_base = 0;
634     uint64_t region_len;
635     uint64_t decode_len;
636     uint64_t blk_size = 2 * MiB;
637     CXLDCRegion *region;
638     MemoryRegion *mr;
639     uint64_t dc_size;
640 
641     mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
642     dc_size = memory_region_size(mr);
643     region_len = DIV_ROUND_UP(dc_size, ct3d->dc.num_regions);
644 
645     if (dc_size % (ct3d->dc.num_regions * CXL_CAPACITY_MULTIPLIER) != 0) {
646         error_setg(errp,
647                    "backend size is not multiple of region len: 0x%" PRIx64,
648                    region_len);
649         return false;
650     }
651     if (region_len % CXL_CAPACITY_MULTIPLIER != 0) {
652         error_setg(errp, "DC region size is unaligned to 0x%" PRIx64,
653                    CXL_CAPACITY_MULTIPLIER);
654         return false;
655     }
656     decode_len = region_len;
657 
658     if (ct3d->hostvmem) {
659         mr = host_memory_backend_get_memory(ct3d->hostvmem);
660         region_base += memory_region_size(mr);
661     }
662     if (ct3d->hostpmem) {
663         mr = host_memory_backend_get_memory(ct3d->hostpmem);
664         region_base += memory_region_size(mr);
665     }
666     if (region_base % CXL_CAPACITY_MULTIPLIER != 0) {
667         error_setg(errp, "DC region base not aligned to 0x%" PRIx64,
668                    CXL_CAPACITY_MULTIPLIER);
669         return false;
670     }
671 
672     for (i = 0, region = &ct3d->dc.regions[0];
673          i < ct3d->dc.num_regions;
674          i++, region++, region_base += region_len) {
675         *region = (CXLDCRegion) {
676             .base = region_base,
677             .decode_len = decode_len,
678             .len = region_len,
679             .block_size = blk_size,
680             /* dsmad_handle set when creating CDAT table entries */
681             .flags = 0,
682         };
683         ct3d->dc.total_capacity += region->len;
684         region->blk_bitmap = bitmap_new(region->len / region->block_size);
685     }
686     QTAILQ_INIT(&ct3d->dc.extents);
687     QTAILQ_INIT(&ct3d->dc.extents_pending);
688 
689     return true;
690 }
691 
692 static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
693 {
694     CXLDCExtent *ent, *ent_next;
695     CXLDCExtentGroup *group, *group_next;
696     int i;
697     CXLDCRegion *region;
698 
699     QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) {
700         cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
701     }
702 
703     QTAILQ_FOREACH_SAFE(group, &ct3d->dc.extents_pending, node, group_next) {
704         QTAILQ_REMOVE(&ct3d->dc.extents_pending, group, node);
705         QTAILQ_FOREACH_SAFE(ent, &group->list, node, ent_next) {
706             cxl_remove_extent_from_extent_list(&group->list, ent);
707         }
708         g_free(group);
709     }
710 
711     for (i = 0; i < ct3d->dc.num_regions; i++) {
712         region = &ct3d->dc.regions[i];
713         g_free(region->blk_bitmap);
714     }
715 }
716 
717 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
718 {
719     DeviceState *ds = DEVICE(ct3d);
720 
721     if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
722         && !ct3d->dc.num_regions) {
723         error_setg(errp, "at least one memdev property must be set");
724         return false;
725     } else if (ct3d->hostmem && ct3d->hostpmem) {
726         error_setg(errp, "[memdev] cannot be used with new "
727                          "[persistent-memdev] property");
728         return false;
729     } else if (ct3d->hostmem) {
730         /* Use of hostmem property implies pmem */
731         ct3d->hostpmem = ct3d->hostmem;
732         ct3d->hostmem = NULL;
733     }
734 
735     if (ct3d->hostpmem && !ct3d->lsa) {
736         error_setg(errp, "lsa property must be set for persistent devices");
737         return false;
738     }
739 
740     if (ct3d->hostvmem) {
741         MemoryRegion *vmr;
742         char *v_name;
743 
744         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
745         if (!vmr) {
746             error_setg(errp, "volatile memdev must have backing device");
747             return false;
748         }
749         if (host_memory_backend_is_mapped(ct3d->hostvmem)) {
750             error_setg(errp, "memory backend %s can't be used multiple times.",
751                object_get_canonical_path_component(OBJECT(ct3d->hostvmem)));
752             return false;
753         }
754         memory_region_set_nonvolatile(vmr, false);
755         memory_region_set_enabled(vmr, true);
756         host_memory_backend_set_mapped(ct3d->hostvmem, true);
757         if (ds->id) {
758             v_name = g_strdup_printf("cxl-type3-dpa-vmem-space:%s", ds->id);
759         } else {
760             v_name = g_strdup("cxl-type3-dpa-vmem-space");
761         }
762         address_space_init(&ct3d->hostvmem_as, vmr, v_name);
763         ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
764         ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
765         g_free(v_name);
766     }
767 
768     if (ct3d->hostpmem) {
769         MemoryRegion *pmr;
770         char *p_name;
771 
772         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
773         if (!pmr) {
774             error_setg(errp, "persistent memdev must have backing device");
775             return false;
776         }
777         if (host_memory_backend_is_mapped(ct3d->hostpmem)) {
778             error_setg(errp, "memory backend %s can't be used multiple times.",
779                object_get_canonical_path_component(OBJECT(ct3d->hostpmem)));
780             return false;
781         }
782         memory_region_set_nonvolatile(pmr, true);
783         memory_region_set_enabled(pmr, true);
784         host_memory_backend_set_mapped(ct3d->hostpmem, true);
785         if (ds->id) {
786             p_name = g_strdup_printf("cxl-type3-dpa-pmem-space:%s", ds->id);
787         } else {
788             p_name = g_strdup("cxl-type3-dpa-pmem-space");
789         }
790         address_space_init(&ct3d->hostpmem_as, pmr, p_name);
791         ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
792         ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
793         g_free(p_name);
794     }
795 
796     ct3d->dc.total_capacity = 0;
797     if (ct3d->dc.num_regions > 0) {
798         MemoryRegion *dc_mr;
799         char *dc_name;
800 
801         if (!ct3d->dc.host_dc) {
802             error_setg(errp, "dynamic capacity must have a backing device");
803             return false;
804         }
805 
806         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
807         if (!dc_mr) {
808             error_setg(errp, "dynamic capacity must have a backing device");
809             return false;
810         }
811 
812         if (host_memory_backend_is_mapped(ct3d->dc.host_dc)) {
813             error_setg(errp, "memory backend %s can't be used multiple times.",
814                object_get_canonical_path_component(OBJECT(ct3d->dc.host_dc)));
815             return false;
816         }
817         /*
818          * Set DC regions as volatile for now, non-volatile support can
819          * be added in the future if needed.
820          */
821         memory_region_set_nonvolatile(dc_mr, false);
822         memory_region_set_enabled(dc_mr, true);
823         host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
824         if (ds->id) {
825             dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
826         } else {
827             dc_name = g_strdup("cxl-dcd-dpa-dc-space");
828         }
829         address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
830         g_free(dc_name);
831 
832         if (!cxl_create_dc_regions(ct3d, errp)) {
833             error_append_hint(errp, "setup DC regions failed");
834             return false;
835         }
836     }
837 
838     return true;
839 }
840 
841 static DOEProtocol doe_cdat_prot[] = {
842     { CXL_VENDOR_ID, CXL_DOE_TABLE_ACCESS, cxl_doe_cdat_rsp },
843     { }
844 };
845 
846 static void ct3_realize(PCIDevice *pci_dev, Error **errp)
847 {
848     ERRP_GUARD();
849     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
850     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
851     ComponentRegisters *regs = &cxl_cstate->crb;
852     MemoryRegion *mr = &regs->component_registers;
853     uint8_t *pci_conf = pci_dev->config;
854     int i, rc;
855     uint16_t count;
856 
857     QTAILQ_INIT(&ct3d->error_list);
858 
859     if (!cxl_setup_memory(ct3d, errp)) {
860         return;
861     }
862 
863     pci_config_set_prog_interface(pci_conf, 0x10);
864 
865     pcie_endpoint_cap_init(pci_dev, 0x80);
866     if (ct3d->sn != UI64_NULL) {
867         pcie_dev_ser_num_init(pci_dev, 0x100, ct3d->sn);
868         cxl_cstate->dvsec_offset = 0x100 + 0x0c;
869     } else {
870         cxl_cstate->dvsec_offset = 0x100;
871     }
872 
873     ct3d->cxl_cstate.pdev = pci_dev;
874     build_dvsecs(ct3d);
875 
876     regs->special_ops = g_new0(MemoryRegionOps, 1);
877     regs->special_ops->write = ct3d_reg_write;
878 
879     cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate,
880                                       TYPE_CXL_TYPE3);
881 
882     pci_register_bar(
883         pci_dev, CXL_COMPONENT_REG_BAR_IDX,
884         PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr);
885 
886     cxl_device_register_block_init(OBJECT(pci_dev), &ct3d->cxl_dstate,
887                                    &ct3d->cci);
888     pci_register_bar(pci_dev, CXL_DEVICE_REG_BAR_IDX,
889                      PCI_BASE_ADDRESS_SPACE_MEMORY |
890                          PCI_BASE_ADDRESS_MEM_TYPE_64,
891                      &ct3d->cxl_dstate.device_registers);
892 
893     /* MSI(-X) Initialization */
894     rc = msix_init_exclusive_bar(pci_dev, CXL_T3_MSIX_VECTOR_NR, 4, errp);
895     if (rc) {
896         goto err_free_special_ops;
897     }
898     for (i = 0; i < CXL_T3_MSIX_VECTOR_NR; i++) {
899         msix_vector_use(pci_dev, i);
900     }
901 
902     /* DOE Initialization */
903     pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true,
904                   CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS);
905 
906     cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
907     cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
908     cxl_cstate->cdat.private = ct3d;
909     if (!cxl_doe_cdat_init(cxl_cstate, errp)) {
910         goto err_msix_uninit;
911     }
912 
913     pcie_cap_deverr_init(pci_dev);
914     /* Leave a bit of room for expansion */
915     rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, errp);
916     if (rc) {
917         goto err_release_cdat;
918     }
919     cxl_event_init(&ct3d->cxl_dstate, CXL_T3_MSIX_EVENT_START);
920 
921     /* Set default value for patrol scrub attributes */
922     ct3d->patrol_scrub_attrs.scrub_cycle_cap =
923                            CXL_MEMDEV_PS_SCRUB_CYCLE_CHANGE_CAP_DEFAULT |
924                            CXL_MEMDEV_PS_SCRUB_REALTIME_REPORT_CAP_DEFAULT;
925     ct3d->patrol_scrub_attrs.scrub_cycle =
926                            CXL_MEMDEV_PS_CUR_SCRUB_CYCLE_DEFAULT |
927                            (CXL_MEMDEV_PS_MIN_SCRUB_CYCLE_DEFAULT << 8);
928     ct3d->patrol_scrub_attrs.scrub_flags = CXL_MEMDEV_PS_ENABLE_DEFAULT;
929 
930     /* Set default value for DDR5 ECS read attributes */
931     ct3d->ecs_attrs.ecs_log_cap = CXL_ECS_LOG_ENTRY_TYPE_DEFAULT;
932     for (count = 0; count < CXL_ECS_NUM_MEDIA_FRUS; count++) {
933         ct3d->ecs_attrs.fru_attrs[count].ecs_cap =
934                             CXL_ECS_REALTIME_REPORT_CAP_DEFAULT;
935         ct3d->ecs_attrs.fru_attrs[count].ecs_config =
936                             CXL_ECS_THRESHOLD_COUNT_DEFAULT |
937                             (CXL_ECS_MODE_DEFAULT << 3);
938         /* Reserved */
939         ct3d->ecs_attrs.fru_attrs[count].ecs_flags = 0;
940     }
941 
942     return;
943 
944 err_release_cdat:
945     cxl_doe_cdat_release(cxl_cstate);
946 err_msix_uninit:
947     msix_uninit_exclusive_bar(pci_dev);
948 err_free_special_ops:
949     g_free(regs->special_ops);
950     if (ct3d->dc.host_dc) {
951         cxl_destroy_dc_regions(ct3d);
952         address_space_destroy(&ct3d->dc.host_dc_as);
953     }
954     if (ct3d->hostpmem) {
955         address_space_destroy(&ct3d->hostpmem_as);
956     }
957     if (ct3d->hostvmem) {
958         address_space_destroy(&ct3d->hostvmem_as);
959     }
960 }
961 
962 static void ct3_exit(PCIDevice *pci_dev)
963 {
964     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
965     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
966     ComponentRegisters *regs = &cxl_cstate->crb;
967 
968     pcie_aer_exit(pci_dev);
969     cxl_doe_cdat_release(cxl_cstate);
970     msix_uninit_exclusive_bar(pci_dev);
971     g_free(regs->special_ops);
972     if (ct3d->dc.host_dc) {
973         cxl_destroy_dc_regions(ct3d);
974         address_space_destroy(&ct3d->dc.host_dc_as);
975     }
976     if (ct3d->hostpmem) {
977         address_space_destroy(&ct3d->hostpmem_as);
978     }
979     if (ct3d->hostvmem) {
980         address_space_destroy(&ct3d->hostvmem_as);
981     }
982 }
983 
984 /*
985  * Mark the DPA range [dpa, dap + len - 1] to be backed and accessible. This
986  * happens when a DC extent is added and accepted by the host.
987  */
988 void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
989                                  uint64_t len)
990 {
991     CXLDCRegion *region;
992 
993     region = cxl_find_dc_region(ct3d, dpa, len);
994     if (!region) {
995         return;
996     }
997 
998     bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
999                len / region->block_size);
1000 }
1001 
1002 /*
1003  * Check whether the DPA range [dpa, dpa + len - 1] is backed with DC extents.
1004  * Used when validating read/write to dc regions
1005  */
1006 bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
1007                                   uint64_t len)
1008 {
1009     CXLDCRegion *region;
1010     uint64_t nbits;
1011     long nr;
1012 
1013     region = cxl_find_dc_region(ct3d, dpa, len);
1014     if (!region) {
1015         return false;
1016     }
1017 
1018     nr = (dpa - region->base) / region->block_size;
1019     nbits = DIV_ROUND_UP(len, region->block_size);
1020     /*
1021      * if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is
1022      * backed with DC extents, return true; else return false.
1023      */
1024     return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + nbits;
1025 }
1026 
1027 /*
1028  * Mark the DPA range [dpa, dap + len - 1] to be unbacked and inaccessible.
1029  * This happens when a dc extent is released by the host.
1030  */
1031 void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
1032                                    uint64_t len)
1033 {
1034     CXLDCRegion *region;
1035     uint64_t nbits;
1036     long nr;
1037 
1038     region = cxl_find_dc_region(ct3d, dpa, len);
1039     if (!region) {
1040         return;
1041     }
1042 
1043     nr = (dpa - region->base) / region->block_size;
1044     nbits = len / region->block_size;
1045     bitmap_clear(region->blk_bitmap, nr, nbits);
1046 }
1047 
1048 static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
1049 {
1050     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
1051     uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
1052     unsigned int hdm_count;
1053     uint32_t cap;
1054     uint64_t dpa_base = 0;
1055     int i;
1056 
1057     cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY);
1058     hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap,
1059                                                  CXL_HDM_DECODER_CAPABILITY,
1060                                                  DECODER_COUNT));
1061 
1062     for (i = 0; i < hdm_count; i++) {
1063         uint64_t decoder_base, decoder_size, hpa_offset, skip;
1064         uint32_t hdm_ctrl, low, high;
1065         int ig, iw;
1066 
1067         low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc);
1068         high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc);
1069         decoder_base = ((uint64_t)high << 32) | (low & 0xf0000000);
1070 
1071         low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc);
1072         high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc);
1073         decoder_size = ((uint64_t)high << 32) | (low & 0xf0000000);
1074 
1075         low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_LO +
1076                        i * hdm_inc);
1077         high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_HI +
1078                         i * hdm_inc);
1079         skip = ((uint64_t)high << 32) | (low & 0xf0000000);
1080         dpa_base += skip;
1081 
1082         hpa_offset = (uint64_t)host_addr - decoder_base;
1083 
1084         hdm_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * hdm_inc);
1085         iw = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IW);
1086         ig = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IG);
1087         if (!FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) {
1088             return false;
1089         }
1090         if (((uint64_t)host_addr < decoder_base) ||
1091             (hpa_offset >= decoder_size)) {
1092             int decoded_iw = cxl_interleave_ways_dec(iw, &error_fatal);
1093 
1094             if (decoded_iw == 0) {
1095                 return false;
1096             }
1097 
1098             dpa_base += decoder_size / decoded_iw;
1099             continue;
1100         }
1101 
1102         if (iw < 8) {
1103             *dpa = dpa_base +
1104                 ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
1105                  ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset)
1106                   >> iw));
1107         } else {
1108             *dpa = dpa_base +
1109                 ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
1110                  ((((MAKE_64BIT_MASK(ig + iw, 64 - ig - iw) & hpa_offset)
1111                    >> (ig + iw)) / 3) << (ig + 8)));
1112         }
1113 
1114         return true;
1115     }
1116     return false;
1117 }
1118 
1119 static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
1120                                        hwaddr host_addr,
1121                                        unsigned int size,
1122                                        AddressSpace **as,
1123                                        uint64_t *dpa_offset)
1124 {
1125     MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
1126     uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
1127 
1128     if (ct3d->hostvmem) {
1129         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
1130         vmr_size = memory_region_size(vmr);
1131     }
1132     if (ct3d->hostpmem) {
1133         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
1134         pmr_size = memory_region_size(pmr);
1135     }
1136     if (ct3d->dc.host_dc) {
1137         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
1138         dc_size = memory_region_size(dc_mr);
1139     }
1140 
1141     if (!vmr && !pmr && !dc_mr) {
1142         return -ENODEV;
1143     }
1144 
1145     if (!cxl_type3_dpa(ct3d, host_addr, dpa_offset)) {
1146         return -EINVAL;
1147     }
1148 
1149     if (*dpa_offset >= vmr_size + pmr_size + dc_size) {
1150         return -EINVAL;
1151     }
1152 
1153     if (*dpa_offset < vmr_size) {
1154         *as = &ct3d->hostvmem_as;
1155     } else if (*dpa_offset < vmr_size + pmr_size) {
1156         *as = &ct3d->hostpmem_as;
1157         *dpa_offset -= vmr_size;
1158     } else {
1159         if (!ct3_test_region_block_backed(ct3d, *dpa_offset, size)) {
1160             return -ENODEV;
1161         }
1162 
1163         *as = &ct3d->dc.host_dc_as;
1164         *dpa_offset -= (vmr_size + pmr_size);
1165     }
1166 
1167     return 0;
1168 }
1169 
1170 MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
1171                            unsigned size, MemTxAttrs attrs)
1172 {
1173     CXLType3Dev *ct3d = CXL_TYPE3(d);
1174     uint64_t dpa_offset = 0;
1175     AddressSpace *as = NULL;
1176     int res;
1177 
1178     res = cxl_type3_hpa_to_as_and_dpa(ct3d, host_addr, size,
1179                                       &as, &dpa_offset);
1180     if (res) {
1181         return MEMTX_ERROR;
1182     }
1183 
1184     if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) {
1185         qemu_guest_getrandom_nofail(data, size);
1186         return MEMTX_OK;
1187     }
1188 
1189     return address_space_read(as, dpa_offset, attrs, data, size);
1190 }
1191 
1192 MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
1193                             unsigned size, MemTxAttrs attrs)
1194 {
1195     CXLType3Dev *ct3d = CXL_TYPE3(d);
1196     uint64_t dpa_offset = 0;
1197     AddressSpace *as = NULL;
1198     int res;
1199 
1200     res = cxl_type3_hpa_to_as_and_dpa(ct3d, host_addr, size,
1201                                       &as, &dpa_offset);
1202     if (res) {
1203         return MEMTX_ERROR;
1204     }
1205 
1206     if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) {
1207         return MEMTX_OK;
1208     }
1209 
1210     return address_space_write(as, dpa_offset, attrs, &data, size);
1211 }
1212 
1213 static void ct3d_reset(DeviceState *dev)
1214 {
1215     CXLType3Dev *ct3d = CXL_TYPE3(dev);
1216     uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1217     uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
1218 
1219     pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed);
1220     cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
1221     cxl_device_register_init_t3(ct3d, CXL_T3_MSIX_MBOX);
1222 
1223     /*
1224      * Bring up an endpoint to target with MCTP over VDM.
1225      * This device is emulating an MLD with single LD for now.
1226      */
1227     cxl_initialize_t3_fm_owned_ld_mctpcci(&ct3d->vdm_fm_owned_ld_mctp_cci,
1228                                           DEVICE(ct3d), DEVICE(ct3d),
1229                                           512); /* Max payload made up */
1230     cxl_initialize_t3_ld_cci(&ct3d->ld0_cci, DEVICE(ct3d), DEVICE(ct3d),
1231                              512); /* Max payload made up */
1232 
1233 }
1234 
1235 static const Property ct3_props[] = {
1236     DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND,
1237                      HostMemoryBackend *), /* for backward compatibility */
1238     DEFINE_PROP_LINK("persistent-memdev", CXLType3Dev, hostpmem,
1239                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1240     DEFINE_PROP_LINK("volatile-memdev", CXLType3Dev, hostvmem,
1241                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1242     DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
1243                      HostMemoryBackend *),
1244     DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
1245     DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
1246     DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
1247     DEFINE_PROP_LINK("volatile-dc-memdev", CXLType3Dev, dc.host_dc,
1248                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1249     DEFINE_PROP_PCIE_LINK_SPEED("x-speed", CXLType3Dev,
1250                                 speed, PCIE_LINK_SPEED_32),
1251     DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev,
1252                                 width, PCIE_LINK_WIDTH_16),
1253 };
1254 
1255 static uint64_t get_lsa_size(CXLType3Dev *ct3d)
1256 {
1257     MemoryRegion *mr;
1258 
1259     if (!ct3d->lsa) {
1260         return 0;
1261     }
1262 
1263     mr = host_memory_backend_get_memory(ct3d->lsa);
1264     return memory_region_size(mr);
1265 }
1266 
1267 static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
1268                                 uint64_t offset)
1269 {
1270     assert(offset + size <= memory_region_size(mr));
1271     assert(offset + size > offset);
1272 }
1273 
1274 static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
1275                     uint64_t offset)
1276 {
1277     MemoryRegion *mr;
1278     void *lsa;
1279 
1280     if (!ct3d->lsa) {
1281         return 0;
1282     }
1283 
1284     mr = host_memory_backend_get_memory(ct3d->lsa);
1285     validate_lsa_access(mr, size, offset);
1286 
1287     lsa = memory_region_get_ram_ptr(mr) + offset;
1288     memcpy(buf, lsa, size);
1289 
1290     return size;
1291 }
1292 
1293 static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
1294                     uint64_t offset)
1295 {
1296     MemoryRegion *mr;
1297     void *lsa;
1298 
1299     if (!ct3d->lsa) {
1300         return;
1301     }
1302 
1303     mr = host_memory_backend_get_memory(ct3d->lsa);
1304     validate_lsa_access(mr, size, offset);
1305 
1306     lsa = memory_region_get_ram_ptr(mr) + offset;
1307     memcpy(lsa, buf, size);
1308     memory_region_set_dirty(mr, offset, size);
1309 
1310     /*
1311      * Just like the PMEM, if the guest is not allowed to exit gracefully, label
1312      * updates will get lost.
1313      */
1314 }
1315 
1316 static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
1317 {
1318     MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
1319     AddressSpace *as;
1320     uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
1321 
1322     if (ct3d->hostvmem) {
1323         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
1324         vmr_size = memory_region_size(vmr);
1325     }
1326     if (ct3d->hostpmem) {
1327         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
1328         pmr_size = memory_region_size(pmr);
1329     }
1330     if (ct3d->dc.host_dc) {
1331         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
1332         dc_size = memory_region_size(dc_mr);
1333      }
1334 
1335     if (!vmr && !pmr && !dc_mr) {
1336         return false;
1337     }
1338 
1339     if (dpa_offset + CXL_CACHE_LINE_SIZE > vmr_size + pmr_size + dc_size) {
1340         return false;
1341     }
1342 
1343     if (dpa_offset < vmr_size) {
1344         as = &ct3d->hostvmem_as;
1345     } else if (dpa_offset < vmr_size + pmr_size) {
1346         as = &ct3d->hostpmem_as;
1347         dpa_offset -= vmr_size;
1348     } else {
1349         as = &ct3d->dc.host_dc_as;
1350         dpa_offset -= (vmr_size + pmr_size);
1351     }
1352 
1353     address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, data,
1354                         CXL_CACHE_LINE_SIZE);
1355     return true;
1356 }
1357 
1358 void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d)
1359 {
1360         ct3d->poison_list_overflowed = true;
1361         ct3d->poison_list_overflow_ts =
1362             cxl_device_get_timestamp(&ct3d->cxl_dstate);
1363 }
1364 
1365 void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d)
1366 {
1367     ct3d->poison_list_overflowed = false;
1368     ct3d->poison_list_overflow_ts = 0;
1369 }
1370 
1371 void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
1372                            Error **errp)
1373 {
1374     Object *obj = object_resolve_path(path, NULL);
1375     CXLType3Dev *ct3d;
1376     CXLPoison *p;
1377 
1378     if (length % 64) {
1379         error_setg(errp, "Poison injection must be in multiples of 64 bytes");
1380         return;
1381     }
1382     if (start % 64) {
1383         error_setg(errp, "Poison start address must be 64 byte aligned");
1384         return;
1385     }
1386     if (!obj) {
1387         error_setg(errp, "Unable to resolve path");
1388         return;
1389     }
1390     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1391         error_setg(errp, "Path does not point to a CXL type 3 device");
1392         return;
1393     }
1394 
1395     ct3d = CXL_TYPE3(obj);
1396 
1397     QLIST_FOREACH(p, &ct3d->poison_list, node) {
1398         if ((start < p->start + p->length) && (start + length > p->start)) {
1399             error_setg(errp,
1400                        "Overlap with existing poisoned region not supported");
1401             return;
1402         }
1403     }
1404 
1405     p = g_new0(CXLPoison, 1);
1406     p->length = length;
1407     p->start = start;
1408     /* Different from injected via the mbox */
1409     p->type = CXL_POISON_TYPE_INTERNAL;
1410 
1411     if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) {
1412         QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
1413         ct3d->poison_list_cnt++;
1414     } else {
1415         if (!ct3d->poison_list_overflowed) {
1416             cxl_set_poison_list_overflowed(ct3d);
1417         }
1418         QLIST_INSERT_HEAD(&ct3d->poison_list_bkp, p, node);
1419     }
1420 }
1421 
1422 /* For uncorrectable errors include support for multiple header recording */
1423 void qmp_cxl_inject_uncorrectable_errors(const char *path,
1424                                          CXLUncorErrorRecordList *errors,
1425                                          Error **errp)
1426 {
1427     Object *obj = object_resolve_path(path, NULL);
1428     static PCIEAERErr err = {};
1429     CXLType3Dev *ct3d;
1430     CXLError *cxl_err;
1431     uint32_t *reg_state;
1432     uint32_t unc_err;
1433     bool first;
1434 
1435     if (!obj) {
1436         error_setg(errp, "Unable to resolve path");
1437         return;
1438     }
1439 
1440     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1441         error_setg(errp, "Path does not point to a CXL type 3 device");
1442         return;
1443     }
1444 
1445     err.status = PCI_ERR_UNC_INTN;
1446     err.source_id = pci_requester_id(PCI_DEVICE(obj));
1447     err.flags = 0;
1448 
1449     ct3d = CXL_TYPE3(obj);
1450 
1451     first = QTAILQ_EMPTY(&ct3d->error_list);
1452     reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1453     while (errors) {
1454         uint32List *header = errors->value->header;
1455         uint8_t header_count = 0;
1456         int cxl_err_code;
1457 
1458         cxl_err_code = ct3d_qmp_uncor_err_to_cxl(errors->value->type);
1459         if (cxl_err_code < 0) {
1460             error_setg(errp, "Unknown error code");
1461             return;
1462         }
1463 
1464         /* If the error is masked, nothing to do here */
1465         if (!((1 << cxl_err_code) &
1466               ~ldl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK))) {
1467             errors = errors->next;
1468             continue;
1469         }
1470 
1471         cxl_err = g_malloc0(sizeof(*cxl_err));
1472 
1473         cxl_err->type = cxl_err_code;
1474         while (header && header_count < 32) {
1475             cxl_err->header[header_count++] = header->value;
1476             header = header->next;
1477         }
1478         if (header_count > 32) {
1479             error_setg(errp, "Header must be 32 DWORD or less");
1480             return;
1481         }
1482         QTAILQ_INSERT_TAIL(&ct3d->error_list, cxl_err, node);
1483 
1484         errors = errors->next;
1485     }
1486 
1487     if (first && !QTAILQ_EMPTY(&ct3d->error_list)) {
1488         uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
1489         uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
1490         uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
1491         int i;
1492 
1493         cxl_err = QTAILQ_FIRST(&ct3d->error_list);
1494         for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
1495             stl_le_p(header_log + i, cxl_err->header[i]);
1496         }
1497 
1498         capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
1499                              FIRST_ERROR_POINTER, cxl_err->type);
1500         stl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL, capctrl);
1501     }
1502 
1503     unc_err = 0;
1504     QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
1505         unc_err |= (1 << cxl_err->type);
1506     }
1507     if (!unc_err) {
1508         return;
1509     }
1510 
1511     stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, unc_err);
1512     pcie_aer_inject_error(PCI_DEVICE(obj), &err);
1513 }
1514 
1515 void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
1516                                       Error **errp)
1517 {
1518     static PCIEAERErr err = {};
1519     Object *obj = object_resolve_path(path, NULL);
1520     CXLType3Dev *ct3d;
1521     uint32_t *reg_state;
1522     uint32_t cor_err;
1523     int cxl_err_type;
1524 
1525     if (!obj) {
1526         error_setg(errp, "Unable to resolve path");
1527         return;
1528     }
1529     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1530         error_setg(errp, "Path does not point to a CXL type 3 device");
1531         return;
1532     }
1533 
1534     err.status = PCI_ERR_COR_INTERNAL;
1535     err.source_id = pci_requester_id(PCI_DEVICE(obj));
1536     err.flags = PCIE_AER_ERR_IS_CORRECTABLE;
1537 
1538     ct3d = CXL_TYPE3(obj);
1539     reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1540     cor_err = ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS);
1541 
1542     cxl_err_type = ct3d_qmp_cor_err_to_cxl(type);
1543     if (cxl_err_type < 0) {
1544         error_setg(errp, "Invalid COR error");
1545         return;
1546     }
1547     /* If the error is masked, nothting to do here */
1548     if (!((1 << cxl_err_type) &
1549           ~ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK))) {
1550         return;
1551     }
1552 
1553     cor_err |= (1 << cxl_err_type);
1554     stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, cor_err);
1555 
1556     pcie_aer_inject_error(PCI_DEVICE(obj), &err);
1557 }
1558 
1559 static void cxl_assign_event_header(CXLEventRecordHdr *hdr,
1560                                     const QemuUUID *uuid, uint32_t flags,
1561                                     uint8_t length, uint64_t timestamp)
1562 {
1563     st24_le_p(&hdr->flags, flags);
1564     hdr->length = length;
1565     memcpy(&hdr->id, uuid, sizeof(hdr->id));
1566     stq_le_p(&hdr->timestamp, timestamp);
1567 }
1568 
1569 static const QemuUUID gen_media_uuid = {
1570     .data = UUID(0xfbcd0a77, 0xc260, 0x417f,
1571                  0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6),
1572 };
1573 
1574 static const QemuUUID dram_uuid = {
1575     .data = UUID(0x601dcbb3, 0x9c06, 0x4eab, 0xb8, 0xaf,
1576                  0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24),
1577 };
1578 
1579 static const QemuUUID memory_module_uuid = {
1580     .data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86,
1581                  0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74),
1582 };
1583 
1584 #define CXL_GMER_VALID_CHANNEL                          BIT(0)
1585 #define CXL_GMER_VALID_RANK                             BIT(1)
1586 #define CXL_GMER_VALID_DEVICE                           BIT(2)
1587 #define CXL_GMER_VALID_COMPONENT                        BIT(3)
1588 
1589 static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
1590 {
1591     switch (log) {
1592     case CXL_EVENT_LOG_INFORMATIONAL:
1593         return CXL_EVENT_TYPE_INFO;
1594     case CXL_EVENT_LOG_WARNING:
1595         return CXL_EVENT_TYPE_WARN;
1596     case CXL_EVENT_LOG_FAILURE:
1597         return CXL_EVENT_TYPE_FAIL;
1598     case CXL_EVENT_LOG_FATAL:
1599         return CXL_EVENT_TYPE_FATAL;
1600     default:
1601         return -EINVAL;
1602     }
1603 }
1604 /* Component ID is device specific.  Define this as a string. */
1605 void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
1606                                         uint8_t flags, uint64_t dpa,
1607                                         uint8_t descriptor, uint8_t type,
1608                                         uint8_t transaction_type,
1609                                         bool has_channel, uint8_t channel,
1610                                         bool has_rank, uint8_t rank,
1611                                         bool has_device, uint32_t device,
1612                                         const char *component_id,
1613                                         Error **errp)
1614 {
1615     Object *obj = object_resolve_path(path, NULL);
1616     CXLEventGenMedia gem;
1617     CXLEventRecordHdr *hdr = &gem.hdr;
1618     CXLDeviceState *cxlds;
1619     CXLType3Dev *ct3d;
1620     uint16_t valid_flags = 0;
1621     uint8_t enc_log;
1622     int rc;
1623 
1624     if (!obj) {
1625         error_setg(errp, "Unable to resolve path");
1626         return;
1627     }
1628     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1629         error_setg(errp, "Path does not point to a CXL type 3 device");
1630         return;
1631     }
1632     ct3d = CXL_TYPE3(obj);
1633     cxlds = &ct3d->cxl_dstate;
1634 
1635     rc = ct3d_qmp_cxl_event_log_enc(log);
1636     if (rc < 0) {
1637         error_setg(errp, "Unhandled error log type");
1638         return;
1639     }
1640     enc_log = rc;
1641 
1642     memset(&gem, 0, sizeof(gem));
1643     cxl_assign_event_header(hdr, &gen_media_uuid, flags, sizeof(gem),
1644                             cxl_device_get_timestamp(&ct3d->cxl_dstate));
1645 
1646     stq_le_p(&gem.phys_addr, dpa);
1647     gem.descriptor = descriptor;
1648     gem.type = type;
1649     gem.transaction_type = transaction_type;
1650 
1651     if (has_channel) {
1652         gem.channel = channel;
1653         valid_flags |= CXL_GMER_VALID_CHANNEL;
1654     }
1655 
1656     if (has_rank) {
1657         gem.rank = rank;
1658         valid_flags |= CXL_GMER_VALID_RANK;
1659     }
1660 
1661     if (has_device) {
1662         st24_le_p(gem.device, device);
1663         valid_flags |= CXL_GMER_VALID_DEVICE;
1664     }
1665 
1666     if (component_id) {
1667         strncpy((char *)gem.component_id, component_id,
1668                 sizeof(gem.component_id) - 1);
1669         valid_flags |= CXL_GMER_VALID_COMPONENT;
1670     }
1671 
1672     stw_le_p(&gem.validity_flags, valid_flags);
1673 
1674     if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&gem)) {
1675         cxl_event_irq_assert(ct3d);
1676     }
1677 }
1678 
1679 #define CXL_DRAM_VALID_CHANNEL                          BIT(0)
1680 #define CXL_DRAM_VALID_RANK                             BIT(1)
1681 #define CXL_DRAM_VALID_NIBBLE_MASK                      BIT(2)
1682 #define CXL_DRAM_VALID_BANK_GROUP                       BIT(3)
1683 #define CXL_DRAM_VALID_BANK                             BIT(4)
1684 #define CXL_DRAM_VALID_ROW                              BIT(5)
1685 #define CXL_DRAM_VALID_COLUMN                           BIT(6)
1686 #define CXL_DRAM_VALID_CORRECTION_MASK                  BIT(7)
1687 
1688 void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
1689                                uint64_t dpa, uint8_t descriptor,
1690                                uint8_t type, uint8_t transaction_type,
1691                                bool has_channel, uint8_t channel,
1692                                bool has_rank, uint8_t rank,
1693                                bool has_nibble_mask, uint32_t nibble_mask,
1694                                bool has_bank_group, uint8_t bank_group,
1695                                bool has_bank, uint8_t bank,
1696                                bool has_row, uint32_t row,
1697                                bool has_column, uint16_t column,
1698                                bool has_correction_mask,
1699                                uint64List *correction_mask,
1700                                Error **errp)
1701 {
1702     Object *obj = object_resolve_path(path, NULL);
1703     CXLEventDram dram;
1704     CXLEventRecordHdr *hdr = &dram.hdr;
1705     CXLDeviceState *cxlds;
1706     CXLType3Dev *ct3d;
1707     uint16_t valid_flags = 0;
1708     uint8_t enc_log;
1709     int rc;
1710 
1711     if (!obj) {
1712         error_setg(errp, "Unable to resolve path");
1713         return;
1714     }
1715     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1716         error_setg(errp, "Path does not point to a CXL type 3 device");
1717         return;
1718     }
1719     ct3d = CXL_TYPE3(obj);
1720     cxlds = &ct3d->cxl_dstate;
1721 
1722     rc = ct3d_qmp_cxl_event_log_enc(log);
1723     if (rc < 0) {
1724         error_setg(errp, "Unhandled error log type");
1725         return;
1726     }
1727     enc_log = rc;
1728 
1729     memset(&dram, 0, sizeof(dram));
1730     cxl_assign_event_header(hdr, &dram_uuid, flags, sizeof(dram),
1731                             cxl_device_get_timestamp(&ct3d->cxl_dstate));
1732     stq_le_p(&dram.phys_addr, dpa);
1733     dram.descriptor = descriptor;
1734     dram.type = type;
1735     dram.transaction_type = transaction_type;
1736 
1737     if (has_channel) {
1738         dram.channel = channel;
1739         valid_flags |= CXL_DRAM_VALID_CHANNEL;
1740     }
1741 
1742     if (has_rank) {
1743         dram.rank = rank;
1744         valid_flags |= CXL_DRAM_VALID_RANK;
1745     }
1746 
1747     if (has_nibble_mask) {
1748         st24_le_p(dram.nibble_mask, nibble_mask);
1749         valid_flags |= CXL_DRAM_VALID_NIBBLE_MASK;
1750     }
1751 
1752     if (has_bank_group) {
1753         dram.bank_group = bank_group;
1754         valid_flags |= CXL_DRAM_VALID_BANK_GROUP;
1755     }
1756 
1757     if (has_bank) {
1758         dram.bank = bank;
1759         valid_flags |= CXL_DRAM_VALID_BANK;
1760     }
1761 
1762     if (has_row) {
1763         st24_le_p(dram.row, row);
1764         valid_flags |= CXL_DRAM_VALID_ROW;
1765     }
1766 
1767     if (has_column) {
1768         stw_le_p(&dram.column, column);
1769         valid_flags |= CXL_DRAM_VALID_COLUMN;
1770     }
1771 
1772     if (has_correction_mask) {
1773         int count = 0;
1774         while (correction_mask && count < 4) {
1775             stq_le_p(&dram.correction_mask[count],
1776                      correction_mask->value);
1777             count++;
1778             correction_mask = correction_mask->next;
1779         }
1780         valid_flags |= CXL_DRAM_VALID_CORRECTION_MASK;
1781     }
1782 
1783     stw_le_p(&dram.validity_flags, valid_flags);
1784 
1785     if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&dram)) {
1786         cxl_event_irq_assert(ct3d);
1787     }
1788 }
1789 
1790 void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
1791                                         uint8_t flags, uint8_t type,
1792                                         uint8_t health_status,
1793                                         uint8_t media_status,
1794                                         uint8_t additional_status,
1795                                         uint8_t life_used,
1796                                         int16_t temperature,
1797                                         uint32_t dirty_shutdown_count,
1798                                         uint32_t corrected_volatile_error_count,
1799                                         uint32_t corrected_persist_error_count,
1800                                         Error **errp)
1801 {
1802     Object *obj = object_resolve_path(path, NULL);
1803     CXLEventMemoryModule module;
1804     CXLEventRecordHdr *hdr = &module.hdr;
1805     CXLDeviceState *cxlds;
1806     CXLType3Dev *ct3d;
1807     uint8_t enc_log;
1808     int rc;
1809 
1810     if (!obj) {
1811         error_setg(errp, "Unable to resolve path");
1812         return;
1813     }
1814     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1815         error_setg(errp, "Path does not point to a CXL type 3 device");
1816         return;
1817     }
1818     ct3d = CXL_TYPE3(obj);
1819     cxlds = &ct3d->cxl_dstate;
1820 
1821     rc = ct3d_qmp_cxl_event_log_enc(log);
1822     if (rc < 0) {
1823         error_setg(errp, "Unhandled error log type");
1824         return;
1825     }
1826     enc_log = rc;
1827 
1828     memset(&module, 0, sizeof(module));
1829     cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module),
1830                             cxl_device_get_timestamp(&ct3d->cxl_dstate));
1831 
1832     module.type = type;
1833     module.health_status = health_status;
1834     module.media_status = media_status;
1835     module.additional_status = additional_status;
1836     module.life_used = life_used;
1837     stw_le_p(&module.temperature, temperature);
1838     stl_le_p(&module.dirty_shutdown_count, dirty_shutdown_count);
1839     stl_le_p(&module.corrected_volatile_error_count,
1840              corrected_volatile_error_count);
1841     stl_le_p(&module.corrected_persistent_error_count,
1842              corrected_persist_error_count);
1843 
1844     if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) {
1845         cxl_event_irq_assert(ct3d);
1846     }
1847 }
1848 
1849 /* CXL r3.1 Table 8-50: Dynamic Capacity Event Record */
1850 static const QemuUUID dynamic_capacity_uuid = {
1851     .data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
1852                  0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
1853 };
1854 
1855 typedef enum CXLDCEventType {
1856     DC_EVENT_ADD_CAPACITY = 0x0,
1857     DC_EVENT_RELEASE_CAPACITY = 0x1,
1858     DC_EVENT_FORCED_RELEASE_CAPACITY = 0x2,
1859     DC_EVENT_REGION_CONFIG_UPDATED = 0x3,
1860     DC_EVENT_ADD_CAPACITY_RSP = 0x4,
1861     DC_EVENT_CAPACITY_RELEASED = 0x5,
1862 } CXLDCEventType;
1863 
1864 /*
1865  * Check whether the range [dpa, dpa + len - 1] has overlaps with extents in
1866  * the list.
1867  * Return value: return true if has overlaps; otherwise, return false
1868  */
1869 static bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list,
1870                                            uint64_t dpa, uint64_t len)
1871 {
1872     CXLDCExtent *ent;
1873     Range range1, range2;
1874 
1875     if (!list) {
1876         return false;
1877     }
1878 
1879     range_init_nofail(&range1, dpa, len);
1880     QTAILQ_FOREACH(ent, list, node) {
1881         range_init_nofail(&range2, ent->start_dpa, ent->len);
1882         if (range_overlaps_range(&range1, &range2)) {
1883             return true;
1884         }
1885     }
1886     return false;
1887 }
1888 
1889 /*
1890  * Check whether the range [dpa, dpa + len - 1] is contained by extents in
1891  * the list.
1892  * Will check multiple extents containment once superset release is added.
1893  * Return value: return true if range is contained; otherwise, return false
1894  */
1895 bool cxl_extents_contains_dpa_range(CXLDCExtentList *list,
1896                                     uint64_t dpa, uint64_t len)
1897 {
1898     CXLDCExtent *ent;
1899     Range range1, range2;
1900 
1901     if (!list) {
1902         return false;
1903     }
1904 
1905     range_init_nofail(&range1, dpa, len);
1906     QTAILQ_FOREACH(ent, list, node) {
1907         range_init_nofail(&range2, ent->start_dpa, ent->len);
1908         if (range_contains_range(&range2, &range1)) {
1909             return true;
1910         }
1911     }
1912     return false;
1913 }
1914 
1915 static bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list,
1916                                                  uint64_t dpa, uint64_t len)
1917 {
1918     CXLDCExtentGroup *group;
1919 
1920     if (!list) {
1921         return false;
1922     }
1923 
1924     QTAILQ_FOREACH(group, list, node) {
1925         if (cxl_extents_overlaps_dpa_range(&group->list, dpa, len)) {
1926             return true;
1927         }
1928     }
1929     return false;
1930 }
1931 
1932 /*
1933  * The main function to process dynamic capacity event with extent list.
1934  * Currently DC extents add/release requests are processed.
1935  */
1936 static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
1937         uint16_t hid, CXLDCEventType type, uint8_t rid,
1938         CxlDynamicCapacityExtentList *records, Error **errp)
1939 {
1940     Object *obj;
1941     CXLEventDynamicCapacity dCap = {};
1942     CXLEventRecordHdr *hdr = &dCap.hdr;
1943     CXLType3Dev *dcd;
1944     uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
1945     uint32_t num_extents = 0;
1946     CxlDynamicCapacityExtentList *list;
1947     CXLDCExtentGroup *group = NULL;
1948     g_autofree CXLDCExtentRaw *extents = NULL;
1949     uint8_t enc_log = CXL_EVENT_TYPE_DYNAMIC_CAP;
1950     uint64_t dpa, offset, len, block_size;
1951     g_autofree unsigned long *blk_bitmap = NULL;
1952     int i;
1953 
1954     obj = object_resolve_path_type(path, TYPE_CXL_TYPE3, NULL);
1955     if (!obj) {
1956         error_setg(errp, "Unable to resolve CXL type 3 device");
1957         return;
1958     }
1959 
1960     dcd = CXL_TYPE3(obj);
1961     if (!dcd->dc.num_regions) {
1962         error_setg(errp, "No dynamic capacity support from the device");
1963         return;
1964     }
1965 
1966 
1967     if (rid >= dcd->dc.num_regions) {
1968         error_setg(errp, "region id is too large");
1969         return;
1970     }
1971     block_size = dcd->dc.regions[rid].block_size;
1972     blk_bitmap = bitmap_new(dcd->dc.regions[rid].len / block_size);
1973 
1974     /* Sanity check and count the extents */
1975     list = records;
1976     while (list) {
1977         offset = list->value->offset;
1978         len = list->value->len;
1979         dpa = offset + dcd->dc.regions[rid].base;
1980 
1981         if (len == 0) {
1982             error_setg(errp, "extent with 0 length is not allowed");
1983             return;
1984         }
1985 
1986         if (offset % block_size || len % block_size) {
1987             error_setg(errp, "dpa or len is not aligned to region block size");
1988             return;
1989         }
1990 
1991         if (offset + len > dcd->dc.regions[rid].len) {
1992             error_setg(errp, "extent range is beyond the region end");
1993             return;
1994         }
1995 
1996         /* No duplicate or overlapped extents are allowed */
1997         if (test_any_bits_set(blk_bitmap, offset / block_size,
1998                               len / block_size)) {
1999             error_setg(errp, "duplicate or overlapped extents are detected");
2000             return;
2001         }
2002         bitmap_set(blk_bitmap, offset / block_size, len / block_size);
2003 
2004         if (type == DC_EVENT_RELEASE_CAPACITY) {
2005             if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending,
2006                                                      dpa, len)) {
2007                 error_setg(errp,
2008                            "cannot release extent with pending DPA range");
2009                 return;
2010             }
2011             if (!ct3_test_region_block_backed(dcd, dpa, len)) {
2012                 error_setg(errp,
2013                            "cannot release extent with non-existing DPA range");
2014                 return;
2015             }
2016         } else if (type == DC_EVENT_ADD_CAPACITY) {
2017             if (cxl_extents_overlaps_dpa_range(&dcd->dc.extents, dpa, len)) {
2018                 error_setg(errp,
2019                            "cannot add DPA already accessible to the same LD");
2020                 return;
2021             }
2022             if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending,
2023                                                      dpa, len)) {
2024                 error_setg(errp,
2025                            "cannot add DPA again while still pending");
2026                 return;
2027             }
2028         }
2029         list = list->next;
2030         num_extents++;
2031     }
2032 
2033     /* Create extent list for event being passed to host */
2034     i = 0;
2035     list = records;
2036     extents = g_new0(CXLDCExtentRaw, num_extents);
2037     while (list) {
2038         offset = list->value->offset;
2039         len = list->value->len;
2040         dpa = dcd->dc.regions[rid].base + offset;
2041 
2042         extents[i].start_dpa = dpa;
2043         extents[i].len = len;
2044         memset(extents[i].tag, 0, 0x10);
2045         extents[i].shared_seq = 0;
2046         if (type == DC_EVENT_ADD_CAPACITY) {
2047             group = cxl_insert_extent_to_extent_group(group,
2048                                                       extents[i].start_dpa,
2049                                                       extents[i].len,
2050                                                       extents[i].tag,
2051                                                       extents[i].shared_seq);
2052         }
2053 
2054         list = list->next;
2055         i++;
2056     }
2057     if (group) {
2058         cxl_extent_group_list_insert_tail(&dcd->dc.extents_pending, group);
2059     }
2060 
2061     /*
2062      * CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record
2063      *
2064      * All Dynamic Capacity event records shall set the Event Record Severity
2065      * field in the Common Event Record Format to Informational Event. All
2066      * Dynamic Capacity related events shall be logged in the Dynamic Capacity
2067      * Event Log.
2068      */
2069     cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
2070                             cxl_device_get_timestamp(&dcd->cxl_dstate));
2071 
2072     dCap.type = type;
2073     /* FIXME: for now, validity flag is cleared */
2074     dCap.validity_flags = 0;
2075     stw_le_p(&dCap.host_id, hid);
2076     /* only valid for DC_REGION_CONFIG_UPDATED event */
2077     dCap.updated_region_id = 0;
2078     for (i = 0; i < num_extents; i++) {
2079         memcpy(&dCap.dynamic_capacity_extent, &extents[i],
2080                sizeof(CXLDCExtentRaw));
2081 
2082         dCap.flags = 0;
2083         if (i < num_extents - 1) {
2084             /* Set "More" flag */
2085             dCap.flags |= BIT(0);
2086         }
2087 
2088         if (cxl_event_insert(&dcd->cxl_dstate, enc_log,
2089                              (CXLEventRecordRaw *)&dCap)) {
2090             cxl_event_irq_assert(dcd);
2091         }
2092     }
2093 }
2094 
2095 void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id,
2096                                   CxlExtentSelectionPolicy sel_policy,
2097                                   uint8_t region, const char *tag,
2098                                   CxlDynamicCapacityExtentList  *extents,
2099                                   Error **errp)
2100 {
2101     switch (sel_policy) {
2102     case CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE:
2103         qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id,
2104                                                       DC_EVENT_ADD_CAPACITY,
2105                                                       region, extents, errp);
2106         return;
2107     default:
2108         error_setg(errp, "Selection policy not supported");
2109         return;
2110     }
2111 }
2112 
2113 void qmp_cxl_release_dynamic_capacity(const char *path, uint16_t host_id,
2114                                       CxlExtentRemovalPolicy removal_policy,
2115                                       bool has_forced_removal,
2116                                       bool forced_removal,
2117                                       bool has_sanitize_on_release,
2118                                       bool sanitize_on_release,
2119                                       uint8_t region,
2120                                       const char *tag,
2121                                       CxlDynamicCapacityExtentList  *extents,
2122                                       Error **errp)
2123 {
2124     CXLDCEventType type = DC_EVENT_RELEASE_CAPACITY;
2125 
2126     if (has_forced_removal && forced_removal) {
2127         /* TODO: enable forced removal in the future */
2128         type = DC_EVENT_FORCED_RELEASE_CAPACITY;
2129         error_setg(errp, "Forced removal not supported yet");
2130         return;
2131     }
2132 
2133     switch (removal_policy) {
2134     case CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE:
2135         qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id, type,
2136                                                       region, extents, errp);
2137         return;
2138     default:
2139         error_setg(errp, "Removal policy not supported");
2140         return;
2141     }
2142 }
2143 
2144 static void ct3_class_init(ObjectClass *oc, const void *data)
2145 {
2146     DeviceClass *dc = DEVICE_CLASS(oc);
2147     PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
2148     CXLType3Class *cvc = CXL_TYPE3_CLASS(oc);
2149 
2150     pc->realize = ct3_realize;
2151     pc->exit = ct3_exit;
2152     pc->class_id = PCI_CLASS_MEMORY_CXL;
2153     pc->vendor_id = PCI_VENDOR_ID_INTEL;
2154     pc->device_id = 0xd93; /* LVF for now */
2155     pc->revision = 1;
2156 
2157     pc->config_write = ct3d_config_write;
2158     pc->config_read = ct3d_config_read;
2159 
2160     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
2161     dc->desc = "CXL Memory Device (Type 3)";
2162     device_class_set_legacy_reset(dc, ct3d_reset);
2163     device_class_set_props(dc, ct3_props);
2164 
2165     cvc->get_lsa_size = get_lsa_size;
2166     cvc->get_lsa = get_lsa;
2167     cvc->set_lsa = set_lsa;
2168     cvc->set_cacheline = set_cacheline;
2169 }
2170 
2171 static const TypeInfo ct3d_info = {
2172     .name = TYPE_CXL_TYPE3,
2173     .parent = TYPE_PCI_DEVICE,
2174     .class_size = sizeof(struct CXLType3Class),
2175     .class_init = ct3_class_init,
2176     .instance_size = sizeof(CXLType3Dev),
2177     .interfaces = (const InterfaceInfo[]) {
2178         { INTERFACE_CXL_DEVICE },
2179         { INTERFACE_PCIE_DEVICE },
2180         {}
2181     },
2182 };
2183 
2184 static void ct3d_registers(void)
2185 {
2186     type_register_static(&ct3d_info);
2187 }
2188 
2189 type_init(ct3d_registers);
2190