xref: /qemu/hw/mem/cxl_type3.c (revision 6ff5da16000f908140723e164d33a0b51a6c4162)
1 /*
2  * CXL Type 3 (memory expander) device
3  *
4  * Copyright(C) 2020 Intel Corporation.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2. See the
7  * COPYING file in the top-level directory.
8  *
9  * SPDX-License-Identifier: GPL-v2-only
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/units.h"
14 #include "qemu/error-report.h"
15 #include "qapi/qapi-commands-cxl.h"
16 #include "hw/mem/memory-device.h"
17 #include "hw/mem/pc-dimm.h"
18 #include "hw/pci/pci.h"
19 #include "hw/qdev-properties.h"
20 #include "hw/qdev-properties-system.h"
21 #include "qapi/error.h"
22 #include "qemu/log.h"
23 #include "qemu/module.h"
24 #include "qemu/pmem.h"
25 #include "qemu/range.h"
26 #include "qemu/rcu.h"
27 #include "qemu/guest-random.h"
28 #include "system/hostmem.h"
29 #include "system/numa.h"
30 #include "hw/cxl/cxl.h"
31 #include "hw/pci/msix.h"
32 
33 /* type3 device private */
34 enum CXL_T3_MSIX_VECTOR {
35     CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS = 0,
36     CXL_T3_MSIX_EVENT_START = 2,
37     CXL_T3_MSIX_MBOX = CXL_T3_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
38     CXL_T3_MSIX_VECTOR_NR
39 };
40 
41 #define DWORD_BYTE 4
42 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
43 
44 /* Default CDAT entries for a memory region */
45 enum {
46     CT3_CDAT_DSMAS,
47     CT3_CDAT_DSLBIS0,
48     CT3_CDAT_DSLBIS1,
49     CT3_CDAT_DSLBIS2,
50     CT3_CDAT_DSLBIS3,
51     CT3_CDAT_DSEMTS,
52     CT3_CDAT_NUM_ENTRIES
53 };
54 
55 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
56                                           int dsmad_handle, uint64_t size,
57                                           bool is_pmem, bool is_dynamic,
58                                           uint64_t dpa_base)
59 {
60     CDATDsmas *dsmas;
61     CDATDslbis *dslbis0;
62     CDATDslbis *dslbis1;
63     CDATDslbis *dslbis2;
64     CDATDslbis *dslbis3;
65     CDATDsemts *dsemts;
66 
67     dsmas = g_malloc(sizeof(*dsmas));
68     *dsmas = (CDATDsmas) {
69         .header = {
70             .type = CDAT_TYPE_DSMAS,
71             .length = sizeof(*dsmas),
72         },
73         .DSMADhandle = dsmad_handle,
74         .flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) |
75                  (is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0),
76         .DPA_base = dpa_base,
77         .DPA_length = size,
78     };
79 
80     /* For now, no memory side cache, plausiblish numbers */
81     dslbis0 = g_malloc(sizeof(*dslbis0));
82     *dslbis0 = (CDATDslbis) {
83         .header = {
84             .type = CDAT_TYPE_DSLBIS,
85             .length = sizeof(*dslbis0),
86         },
87         .handle = dsmad_handle,
88         .flags = HMAT_LB_MEM_MEMORY,
89         .data_type = HMAT_LB_DATA_READ_LATENCY,
90         .entry_base_unit = 10000, /* 10ns base */
91         .entry[0] = 15, /* 150ns */
92     };
93 
94     dslbis1 = g_malloc(sizeof(*dslbis1));
95     *dslbis1 = (CDATDslbis) {
96         .header = {
97             .type = CDAT_TYPE_DSLBIS,
98             .length = sizeof(*dslbis1),
99         },
100         .handle = dsmad_handle,
101         .flags = HMAT_LB_MEM_MEMORY,
102         .data_type = HMAT_LB_DATA_WRITE_LATENCY,
103         .entry_base_unit = 10000,
104         .entry[0] = 25, /* 250ns */
105     };
106 
107     dslbis2 = g_malloc(sizeof(*dslbis2));
108     *dslbis2 = (CDATDslbis) {
109         .header = {
110             .type = CDAT_TYPE_DSLBIS,
111             .length = sizeof(*dslbis2),
112         },
113         .handle = dsmad_handle,
114         .flags = HMAT_LB_MEM_MEMORY,
115         .data_type = HMAT_LB_DATA_READ_BANDWIDTH,
116         .entry_base_unit = 1000, /* GB/s */
117         .entry[0] = 16,
118     };
119 
120     dslbis3 = g_malloc(sizeof(*dslbis3));
121     *dslbis3 = (CDATDslbis) {
122         .header = {
123             .type = CDAT_TYPE_DSLBIS,
124             .length = sizeof(*dslbis3),
125         },
126         .handle = dsmad_handle,
127         .flags = HMAT_LB_MEM_MEMORY,
128         .data_type = HMAT_LB_DATA_WRITE_BANDWIDTH,
129         .entry_base_unit = 1000, /* GB/s */
130         .entry[0] = 16,
131     };
132 
133     dsemts = g_malloc(sizeof(*dsemts));
134     *dsemts = (CDATDsemts) {
135         .header = {
136             .type = CDAT_TYPE_DSEMTS,
137             .length = sizeof(*dsemts),
138         },
139         .DSMAS_handle = dsmad_handle,
140         /*
141          * NV: Reserved - the non volatile from DSMAS matters
142          * V: EFI_MEMORY_SP
143          */
144         .EFI_memory_type_attr = is_pmem ? 2 : 1,
145         .DPA_offset = 0,
146         .DPA_length = size,
147     };
148 
149     /* Header always at start of structure */
150     cdat_table[CT3_CDAT_DSMAS] = (CDATSubHeader *)dsmas;
151     cdat_table[CT3_CDAT_DSLBIS0] = (CDATSubHeader *)dslbis0;
152     cdat_table[CT3_CDAT_DSLBIS1] = (CDATSubHeader *)dslbis1;
153     cdat_table[CT3_CDAT_DSLBIS2] = (CDATSubHeader *)dslbis2;
154     cdat_table[CT3_CDAT_DSLBIS3] = (CDATSubHeader *)dslbis3;
155     cdat_table[CT3_CDAT_DSEMTS] = (CDATSubHeader *)dsemts;
156 }
157 
158 static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
159 {
160     g_autofree CDATSubHeader **table = NULL;
161     CXLType3Dev *ct3d = priv;
162     MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
163     MemoryRegion *dc_mr = NULL;
164     uint64_t vmr_size = 0, pmr_size = 0;
165     int dsmad_handle = 0;
166     int cur_ent = 0;
167     int len = 0;
168 
169     if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
170         return 0;
171     }
172 
173     if (ct3d->hostvmem) {
174         volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
175         if (!volatile_mr) {
176             return -EINVAL;
177         }
178         len += CT3_CDAT_NUM_ENTRIES;
179         vmr_size = memory_region_size(volatile_mr);
180     }
181 
182     if (ct3d->hostpmem) {
183         nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostpmem);
184         if (!nonvolatile_mr) {
185             return -EINVAL;
186         }
187         len += CT3_CDAT_NUM_ENTRIES;
188         pmr_size = memory_region_size(nonvolatile_mr);
189     }
190 
191     if (ct3d->dc.num_regions) {
192         if (!ct3d->dc.host_dc) {
193             return -EINVAL;
194         }
195         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
196         if (!dc_mr) {
197             return -EINVAL;
198         }
199         len += CT3_CDAT_NUM_ENTRIES * ct3d->dc.num_regions;
200     }
201 
202     table = g_malloc0(len * sizeof(*table));
203 
204     /* Now fill them in */
205     if (volatile_mr) {
206         ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size,
207                                       false, false, 0);
208         cur_ent = CT3_CDAT_NUM_ENTRIES;
209     }
210 
211     if (nonvolatile_mr) {
212         uint64_t base = vmr_size;
213         ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
214                                       pmr_size, true, false, base);
215         cur_ent += CT3_CDAT_NUM_ENTRIES;
216     }
217 
218     if (dc_mr) {
219         int i;
220         uint64_t region_base = vmr_size + pmr_size;
221 
222         /*
223          * We assume the dynamic capacity to be volatile for now.
224          * Non-volatile dynamic capacity will be added if needed in the
225          * future.
226          */
227         for (i = 0; i < ct3d->dc.num_regions; i++) {
228             ct3_build_cdat_entries_for_mr(&(table[cur_ent]),
229                                           dsmad_handle++,
230                                           ct3d->dc.regions[i].len,
231                                           false, true, region_base);
232             ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1;
233 
234             cur_ent += CT3_CDAT_NUM_ENTRIES;
235             region_base += ct3d->dc.regions[i].len;
236         }
237     }
238 
239     assert(len == cur_ent);
240 
241     *cdat_table = g_steal_pointer(&table);
242 
243     return len;
244 }
245 
246 static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv)
247 {
248     int i;
249 
250     for (i = 0; i < num; i++) {
251         g_free(cdat_table[i]);
252     }
253     g_free(cdat_table);
254 }
255 
256 static bool cxl_doe_cdat_rsp(DOECap *doe_cap)
257 {
258     CDATObject *cdat = &CXL_TYPE3(doe_cap->pdev)->cxl_cstate.cdat;
259     uint16_t ent;
260     void *base;
261     uint32_t len;
262     CDATReq *req = pcie_doe_get_write_mbox_ptr(doe_cap);
263     CDATRsp rsp;
264 
265     assert(cdat->entry_len);
266 
267     /* Discard if request length mismatched */
268     if (pcie_doe_get_obj_len(req) <
269         DIV_ROUND_UP(sizeof(CDATReq), DWORD_BYTE)) {
270         return false;
271     }
272 
273     ent = req->entry_handle;
274     base = cdat->entry[ent].base;
275     len = cdat->entry[ent].length;
276 
277     rsp = (CDATRsp) {
278         .header = {
279             .vendor_id = CXL_VENDOR_ID,
280             .data_obj_type = CXL_DOE_TABLE_ACCESS,
281             .reserved = 0x0,
282             .length = DIV_ROUND_UP((sizeof(rsp) + len), DWORD_BYTE),
283         },
284         .rsp_code = CXL_DOE_TAB_RSP,
285         .table_type = CXL_DOE_TAB_TYPE_CDAT,
286         .entry_handle = (ent < cdat->entry_len - 1) ?
287                         ent + 1 : CXL_DOE_TAB_ENT_MAX,
288     };
289 
290     memcpy(doe_cap->read_mbox, &rsp, sizeof(rsp));
291     memcpy(doe_cap->read_mbox + DIV_ROUND_UP(sizeof(rsp), DWORD_BYTE),
292            base, len);
293 
294     doe_cap->read_mbox_len += rsp.header.length;
295 
296     return true;
297 }
298 
299 static uint32_t ct3d_config_read(PCIDevice *pci_dev, uint32_t addr, int size)
300 {
301     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
302     uint32_t val;
303 
304     if (pcie_doe_read_config(&ct3d->doe_cdat, addr, size, &val)) {
305         return val;
306     }
307 
308     return pci_default_read_config(pci_dev, addr, size);
309 }
310 
311 static void ct3d_config_write(PCIDevice *pci_dev, uint32_t addr, uint32_t val,
312                               int size)
313 {
314     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
315 
316     pcie_doe_write_config(&ct3d->doe_cdat, addr, val, size);
317     pci_default_write_config(pci_dev, addr, val, size);
318     pcie_aer_write_config(pci_dev, addr, val, size);
319 }
320 
321 /*
322  * Null value of all Fs suggested by IEEE RA guidelines for use of
323  * EU, OUI and CID
324  */
325 #define UI64_NULL ~(0ULL)
326 
327 static void build_dvsecs(CXLType3Dev *ct3d)
328 {
329     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
330     uint8_t *dvsec;
331     uint32_t range1_size_hi, range1_size_lo,
332              range1_base_hi = 0, range1_base_lo = 0,
333              range2_size_hi = 0, range2_size_lo = 0,
334              range2_base_hi = 0, range2_base_lo = 0;
335 
336     /*
337      * Volatile memory is mapped as (0x0)
338      * Persistent memory is mapped at (volatile->size)
339      */
340     if (ct3d->hostvmem) {
341         range1_size_hi = ct3d->hostvmem->size >> 32;
342         range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
343                          (ct3d->hostvmem->size & 0xF0000000);
344         if (ct3d->hostpmem) {
345             range2_size_hi = ct3d->hostpmem->size >> 32;
346             range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
347                              (ct3d->hostpmem->size & 0xF0000000);
348         }
349     } else if (ct3d->hostpmem) {
350         range1_size_hi = ct3d->hostpmem->size >> 32;
351         range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
352                          (ct3d->hostpmem->size & 0xF0000000);
353     } else {
354         /*
355          * For DCD with no static memory, set memory active, memory class bits.
356          * No range is set.
357          */
358         range1_size_hi = 0;
359         range1_size_lo = (2 << 5) | (2 << 2) | 0x3;
360     }
361 
362     dvsec = (uint8_t *)&(CXLDVSECDevice){
363         .cap = 0x1e,
364         .ctrl = 0x2,
365         .status2 = 0x2,
366         .range1_size_hi = range1_size_hi,
367         .range1_size_lo = range1_size_lo,
368         .range1_base_hi = range1_base_hi,
369         .range1_base_lo = range1_base_lo,
370         .range2_size_hi = range2_size_hi,
371         .range2_size_lo = range2_size_lo,
372         .range2_base_hi = range2_base_hi,
373         .range2_base_lo = range2_base_lo,
374     };
375     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
376                                PCIE_CXL_DEVICE_DVSEC_LENGTH,
377                                PCIE_CXL_DEVICE_DVSEC,
378                                PCIE_CXL31_DEVICE_DVSEC_REVID, dvsec);
379 
380     dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
381         .rsvd         = 0,
382         .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
383         .reg0_base_hi = 0,
384         .reg1_base_lo = RBI_CXL_DEVICE_REG | CXL_DEVICE_REG_BAR_IDX,
385         .reg1_base_hi = 0,
386     };
387     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
388                                REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
389                                REG_LOC_DVSEC_REVID, dvsec);
390     dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){
391         .phase2_duration = 0x603, /* 3 seconds */
392         .phase2_power = 0x33, /* 0x33 miliwatts */
393     };
394     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
395                                GPF_DEVICE_DVSEC_LENGTH, GPF_DEVICE_DVSEC,
396                                GPF_DEVICE_DVSEC_REVID, dvsec);
397 
398     dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){
399         .cap                     = 0x26, /* 68B, IO, Mem, non-MLD */
400         .ctrl                    = 0x02, /* IO always enabled */
401         .status                  = 0x26, /* same as capabilities */
402         .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */
403     };
404     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
405                                PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH,
406                                PCIE_FLEXBUS_PORT_DVSEC,
407                                PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, dvsec);
408 }
409 
410 static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
411 {
412     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
413     ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
414     uint32_t *cache_mem = cregs->cache_mem_registers;
415     uint32_t ctrl;
416 
417     ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc);
418     /* TODO: Sanity checks that the decoder is possible */
419     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0);
420     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
421 
422     stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl);
423 }
424 
425 static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int which)
426 {
427     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
428     ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
429     uint32_t *cache_mem = cregs->cache_mem_registers;
430     uint32_t ctrl;
431 
432     ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc);
433 
434     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0);
435     ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 0);
436 
437     stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl);
438 }
439 
440 static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)
441 {
442     switch (qmp_err) {
443     case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY:
444         return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY;
445     case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY:
446         return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY;
447     case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY:
448         return CXL_RAS_UNC_ERR_CACHE_BE_PARITY;
449     case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC:
450         return CXL_RAS_UNC_ERR_CACHE_DATA_ECC;
451     case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY:
452         return CXL_RAS_UNC_ERR_MEM_DATA_PARITY;
453     case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY:
454         return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY;
455     case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY:
456         return CXL_RAS_UNC_ERR_MEM_BE_PARITY;
457     case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC:
458         return CXL_RAS_UNC_ERR_MEM_DATA_ECC;
459     case CXL_UNCOR_ERROR_TYPE_REINIT_THRESHOLD:
460         return CXL_RAS_UNC_ERR_REINIT_THRESHOLD;
461     case CXL_UNCOR_ERROR_TYPE_RSVD_ENCODING:
462         return CXL_RAS_UNC_ERR_RSVD_ENCODING;
463     case CXL_UNCOR_ERROR_TYPE_POISON_RECEIVED:
464         return CXL_RAS_UNC_ERR_POISON_RECEIVED;
465     case CXL_UNCOR_ERROR_TYPE_RECEIVER_OVERFLOW:
466         return CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW;
467     case CXL_UNCOR_ERROR_TYPE_INTERNAL:
468         return CXL_RAS_UNC_ERR_INTERNAL;
469     case CXL_UNCOR_ERROR_TYPE_CXL_IDE_TX:
470         return CXL_RAS_UNC_ERR_CXL_IDE_TX;
471     case CXL_UNCOR_ERROR_TYPE_CXL_IDE_RX:
472         return CXL_RAS_UNC_ERR_CXL_IDE_RX;
473     default:
474         return -EINVAL;
475     }
476 }
477 
478 static int ct3d_qmp_cor_err_to_cxl(CxlCorErrorType qmp_err)
479 {
480     switch (qmp_err) {
481     case CXL_COR_ERROR_TYPE_CACHE_DATA_ECC:
482         return CXL_RAS_COR_ERR_CACHE_DATA_ECC;
483     case CXL_COR_ERROR_TYPE_MEM_DATA_ECC:
484         return CXL_RAS_COR_ERR_MEM_DATA_ECC;
485     case CXL_COR_ERROR_TYPE_CRC_THRESHOLD:
486         return CXL_RAS_COR_ERR_CRC_THRESHOLD;
487     case CXL_COR_ERROR_TYPE_RETRY_THRESHOLD:
488         return CXL_RAS_COR_ERR_RETRY_THRESHOLD;
489     case CXL_COR_ERROR_TYPE_CACHE_POISON_RECEIVED:
490         return CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED;
491     case CXL_COR_ERROR_TYPE_MEM_POISON_RECEIVED:
492         return CXL_RAS_COR_ERR_MEM_POISON_RECEIVED;
493     case CXL_COR_ERROR_TYPE_PHYSICAL:
494         return CXL_RAS_COR_ERR_PHYSICAL;
495     default:
496         return -EINVAL;
497     }
498 }
499 
500 static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
501                            unsigned size)
502 {
503     CXLComponentState *cxl_cstate = opaque;
504     ComponentRegisters *cregs = &cxl_cstate->crb;
505     CXLType3Dev *ct3d = container_of(cxl_cstate, CXLType3Dev, cxl_cstate);
506     uint32_t *cache_mem = cregs->cache_mem_registers;
507     bool should_commit = false;
508     bool should_uncommit = false;
509     int which_hdm = -1;
510 
511     assert(size == 4);
512     g_assert(offset < CXL2_COMPONENT_CM_REGION_SIZE);
513 
514     switch (offset) {
515     case A_CXL_HDM_DECODER0_CTRL:
516         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
517         should_uncommit = !should_commit;
518         which_hdm = 0;
519         break;
520     case A_CXL_HDM_DECODER1_CTRL:
521         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
522         should_uncommit = !should_commit;
523         which_hdm = 1;
524         break;
525     case A_CXL_HDM_DECODER2_CTRL:
526         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
527         should_uncommit = !should_commit;
528         which_hdm = 2;
529         break;
530     case A_CXL_HDM_DECODER3_CTRL:
531         should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
532         should_uncommit = !should_commit;
533         which_hdm = 3;
534         break;
535     case A_CXL_RAS_UNC_ERR_STATUS:
536     {
537         uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
538         uint32_t fe = FIELD_EX32(capctrl, CXL_RAS_ERR_CAP_CTRL,
539                                  FIRST_ERROR_POINTER);
540         CXLError *cxl_err;
541         uint32_t unc_err;
542 
543         /*
544          * If single bit written that corresponds to the first error
545          * pointer being cleared, update the status and header log.
546          */
547         if (!QTAILQ_EMPTY(&ct3d->error_list)) {
548             if ((1 << fe) ^ value) {
549                 CXLError *cxl_next;
550                 /*
551                  * Software is using wrong flow for multiple header recording
552                  * Following behavior in PCIe r6.0 and assuming multiple
553                  * header support. Implementation defined choice to clear all
554                  * matching records if more than one bit set - which corresponds
555                  * closest to behavior of hardware not capable of multiple
556                  * header recording.
557                  */
558                 QTAILQ_FOREACH_SAFE(cxl_err, &ct3d->error_list, node,
559                                     cxl_next) {
560                     if ((1 << cxl_err->type) & value) {
561                         QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
562                         g_free(cxl_err);
563                     }
564                 }
565             } else {
566                 /* Done with previous FE, so drop from list */
567                 cxl_err = QTAILQ_FIRST(&ct3d->error_list);
568                 QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
569                 g_free(cxl_err);
570             }
571 
572             /*
573              * If there is another FE, then put that in place and update
574              * the header log
575              */
576             if (!QTAILQ_EMPTY(&ct3d->error_list)) {
577                 uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
578                 int i;
579 
580                 cxl_err = QTAILQ_FIRST(&ct3d->error_list);
581                 for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
582                     stl_le_p(header_log + i, cxl_err->header[i]);
583                 }
584                 capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
585                                      FIRST_ERROR_POINTER, cxl_err->type);
586             } else {
587                 /*
588                  * If no more errors, then follow recommendation of PCI spec
589                  * r6.0 6.2.4.2 to set the first error pointer to a status
590                  * bit that will never be used.
591                  */
592                 capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
593                                      FIRST_ERROR_POINTER,
594                                      CXL_RAS_UNC_ERR_CXL_UNUSED);
595             }
596             stl_le_p((uint8_t *)cache_mem + A_CXL_RAS_ERR_CAP_CTRL, capctrl);
597         }
598         unc_err = 0;
599         QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
600             unc_err |= 1 << cxl_err->type;
601         }
602         stl_le_p((uint8_t *)cache_mem + offset, unc_err);
603 
604         return;
605     }
606     case A_CXL_RAS_COR_ERR_STATUS:
607     {
608         uint32_t rw1c = value;
609         uint32_t temp = ldl_le_p((uint8_t *)cache_mem + offset);
610         temp &= ~rw1c;
611         stl_le_p((uint8_t *)cache_mem + offset, temp);
612         return;
613     }
614     default:
615         break;
616     }
617 
618     stl_le_p((uint8_t *)cache_mem + offset, value);
619     if (should_commit) {
620         hdm_decoder_commit(ct3d, which_hdm);
621     } else if (should_uncommit) {
622         hdm_decoder_uncommit(ct3d, which_hdm);
623     }
624 }
625 
626 /*
627  * TODO: dc region configuration will be updated once host backend and address
628  * space support is added for DCD.
629  */
630 static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
631 {
632     int i;
633     uint64_t region_base = 0;
634     uint64_t region_len;
635     uint64_t decode_len;
636     uint64_t blk_size = 2 * MiB;
637     CXLDCRegion *region;
638     MemoryRegion *mr;
639     uint64_t dc_size;
640 
641     mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
642     dc_size = memory_region_size(mr);
643     region_len = DIV_ROUND_UP(dc_size, ct3d->dc.num_regions);
644 
645     if (dc_size % (ct3d->dc.num_regions * CXL_CAPACITY_MULTIPLIER) != 0) {
646         error_setg(errp,
647                    "backend size is not multiple of region len: 0x%" PRIx64,
648                    region_len);
649         return false;
650     }
651     if (region_len % CXL_CAPACITY_MULTIPLIER != 0) {
652         error_setg(errp, "DC region size is unaligned to 0x%" PRIx64,
653                    CXL_CAPACITY_MULTIPLIER);
654         return false;
655     }
656     decode_len = region_len;
657 
658     if (ct3d->hostvmem) {
659         mr = host_memory_backend_get_memory(ct3d->hostvmem);
660         region_base += memory_region_size(mr);
661     }
662     if (ct3d->hostpmem) {
663         mr = host_memory_backend_get_memory(ct3d->hostpmem);
664         region_base += memory_region_size(mr);
665     }
666     if (region_base % CXL_CAPACITY_MULTIPLIER != 0) {
667         error_setg(errp, "DC region base not aligned to 0x%" PRIx64,
668                    CXL_CAPACITY_MULTIPLIER);
669         return false;
670     }
671 
672     for (i = 0, region = &ct3d->dc.regions[0];
673          i < ct3d->dc.num_regions;
674          i++, region++, region_base += region_len) {
675         *region = (CXLDCRegion) {
676             .base = region_base,
677             .decode_len = decode_len,
678             .len = region_len,
679             .block_size = blk_size,
680             /* dsmad_handle set when creating CDAT table entries */
681             .flags = 0,
682         };
683         ct3d->dc.total_capacity += region->len;
684         region->blk_bitmap = bitmap_new(region->len / region->block_size);
685     }
686     QTAILQ_INIT(&ct3d->dc.extents);
687     QTAILQ_INIT(&ct3d->dc.extents_pending);
688 
689     return true;
690 }
691 
692 static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
693 {
694     CXLDCExtent *ent, *ent_next;
695     CXLDCExtentGroup *group, *group_next;
696     int i;
697     CXLDCRegion *region;
698 
699     QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node, ent_next) {
700         cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
701     }
702 
703     QTAILQ_FOREACH_SAFE(group, &ct3d->dc.extents_pending, node, group_next) {
704         QTAILQ_REMOVE(&ct3d->dc.extents_pending, group, node);
705         QTAILQ_FOREACH_SAFE(ent, &group->list, node, ent_next) {
706             cxl_remove_extent_from_extent_list(&group->list, ent);
707         }
708         g_free(group);
709     }
710 
711     for (i = 0; i < ct3d->dc.num_regions; i++) {
712         region = &ct3d->dc.regions[i];
713         g_free(region->blk_bitmap);
714     }
715 }
716 
717 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
718 {
719     DeviceState *ds = DEVICE(ct3d);
720 
721     if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem
722         && !ct3d->dc.num_regions) {
723         error_setg(errp, "at least one memdev property must be set");
724         return false;
725     } else if (ct3d->hostmem && ct3d->hostpmem) {
726         error_setg(errp, "[memdev] cannot be used with new "
727                          "[persistent-memdev] property");
728         return false;
729     } else if (ct3d->hostmem) {
730         /* Use of hostmem property implies pmem */
731         ct3d->hostpmem = ct3d->hostmem;
732         ct3d->hostmem = NULL;
733     }
734 
735     if (ct3d->hostpmem && !ct3d->lsa) {
736         error_setg(errp, "lsa property must be set for persistent devices");
737         return false;
738     }
739 
740     if (ct3d->hostvmem) {
741         MemoryRegion *vmr;
742         char *v_name;
743 
744         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
745         if (!vmr) {
746             error_setg(errp, "volatile memdev must have backing device");
747             return false;
748         }
749         if (host_memory_backend_is_mapped(ct3d->hostvmem)) {
750             error_setg(errp, "memory backend %s can't be used multiple times.",
751                object_get_canonical_path_component(OBJECT(ct3d->hostvmem)));
752             return false;
753         }
754         memory_region_set_nonvolatile(vmr, false);
755         memory_region_set_enabled(vmr, true);
756         host_memory_backend_set_mapped(ct3d->hostvmem, true);
757         if (ds->id) {
758             v_name = g_strdup_printf("cxl-type3-dpa-vmem-space:%s", ds->id);
759         } else {
760             v_name = g_strdup("cxl-type3-dpa-vmem-space");
761         }
762         address_space_init(&ct3d->hostvmem_as, vmr, v_name);
763         ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
764         ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
765         g_free(v_name);
766     }
767 
768     if (ct3d->hostpmem) {
769         MemoryRegion *pmr;
770         char *p_name;
771 
772         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
773         if (!pmr) {
774             error_setg(errp, "persistent memdev must have backing device");
775             return false;
776         }
777         if (host_memory_backend_is_mapped(ct3d->hostpmem)) {
778             error_setg(errp, "memory backend %s can't be used multiple times.",
779                object_get_canonical_path_component(OBJECT(ct3d->hostpmem)));
780             return false;
781         }
782         memory_region_set_nonvolatile(pmr, true);
783         memory_region_set_enabled(pmr, true);
784         host_memory_backend_set_mapped(ct3d->hostpmem, true);
785         if (ds->id) {
786             p_name = g_strdup_printf("cxl-type3-dpa-pmem-space:%s", ds->id);
787         } else {
788             p_name = g_strdup("cxl-type3-dpa-pmem-space");
789         }
790         address_space_init(&ct3d->hostpmem_as, pmr, p_name);
791         ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
792         ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
793         g_free(p_name);
794     }
795 
796     ct3d->dc.total_capacity = 0;
797     if (ct3d->dc.num_regions > 0) {
798         MemoryRegion *dc_mr;
799         char *dc_name;
800 
801         if (!ct3d->dc.host_dc) {
802             error_setg(errp, "dynamic capacity must have a backing device");
803             return false;
804         }
805 
806         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
807         if (!dc_mr) {
808             error_setg(errp, "dynamic capacity must have a backing device");
809             return false;
810         }
811 
812         if (host_memory_backend_is_mapped(ct3d->dc.host_dc)) {
813             error_setg(errp, "memory backend %s can't be used multiple times.",
814                object_get_canonical_path_component(OBJECT(ct3d->dc.host_dc)));
815             return false;
816         }
817         /*
818          * Set DC regions as volatile for now, non-volatile support can
819          * be added in the future if needed.
820          */
821         memory_region_set_nonvolatile(dc_mr, false);
822         memory_region_set_enabled(dc_mr, true);
823         host_memory_backend_set_mapped(ct3d->dc.host_dc, true);
824         if (ds->id) {
825             dc_name = g_strdup_printf("cxl-dcd-dpa-dc-space:%s", ds->id);
826         } else {
827             dc_name = g_strdup("cxl-dcd-dpa-dc-space");
828         }
829         address_space_init(&ct3d->dc.host_dc_as, dc_mr, dc_name);
830         g_free(dc_name);
831 
832         if (!cxl_create_dc_regions(ct3d, errp)) {
833             error_append_hint(errp, "setup DC regions failed");
834             return false;
835         }
836     }
837 
838     return true;
839 }
840 
841 static DOEProtocol doe_cdat_prot[] = {
842     { CXL_VENDOR_ID, CXL_DOE_TABLE_ACCESS, cxl_doe_cdat_rsp },
843     { }
844 };
845 
846 static void ct3_realize(PCIDevice *pci_dev, Error **errp)
847 {
848     ERRP_GUARD();
849     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
850     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
851     ComponentRegisters *regs = &cxl_cstate->crb;
852     MemoryRegion *mr = &regs->component_registers;
853     uint8_t *pci_conf = pci_dev->config;
854     int i, rc;
855     uint16_t count;
856 
857     QTAILQ_INIT(&ct3d->error_list);
858 
859     if (!cxl_setup_memory(ct3d, errp)) {
860         return;
861     }
862 
863     pci_config_set_prog_interface(pci_conf, 0x10);
864 
865     pcie_endpoint_cap_init(pci_dev, 0x80);
866     if (ct3d->sn != UI64_NULL) {
867         pcie_dev_ser_num_init(pci_dev, 0x100, ct3d->sn);
868         cxl_cstate->dvsec_offset = 0x100 + 0x0c;
869     } else {
870         cxl_cstate->dvsec_offset = 0x100;
871     }
872 
873     ct3d->cxl_cstate.pdev = pci_dev;
874     build_dvsecs(ct3d);
875 
876     regs->special_ops = g_new0(MemoryRegionOps, 1);
877     regs->special_ops->write = ct3d_reg_write;
878 
879     cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate,
880                                       TYPE_CXL_TYPE3);
881 
882     pci_register_bar(
883         pci_dev, CXL_COMPONENT_REG_BAR_IDX,
884         PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr);
885 
886     cxl_device_register_block_init(OBJECT(pci_dev), &ct3d->cxl_dstate,
887                                    &ct3d->cci);
888     pci_register_bar(pci_dev, CXL_DEVICE_REG_BAR_IDX,
889                      PCI_BASE_ADDRESS_SPACE_MEMORY |
890                          PCI_BASE_ADDRESS_MEM_TYPE_64,
891                      &ct3d->cxl_dstate.device_registers);
892 
893     /* MSI(-X) Initialization */
894     rc = msix_init_exclusive_bar(pci_dev, CXL_T3_MSIX_VECTOR_NR, 4, errp);
895     if (rc) {
896         goto err_free_special_ops;
897     }
898     for (i = 0; i < CXL_T3_MSIX_VECTOR_NR; i++) {
899         msix_vector_use(pci_dev, i);
900     }
901 
902     /* DOE Initialization */
903     pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true,
904                   CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS);
905 
906     cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
907     cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
908     cxl_cstate->cdat.private = ct3d;
909     if (!cxl_doe_cdat_init(cxl_cstate, errp)) {
910         goto err_msix_uninit;
911     }
912 
913     pcie_cap_deverr_init(pci_dev);
914     /* Leave a bit of room for expansion */
915     rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, errp);
916     if (rc) {
917         goto err_release_cdat;
918     }
919     cxl_event_init(&ct3d->cxl_dstate, CXL_T3_MSIX_EVENT_START);
920 
921     /* Set default value for patrol scrub attributes */
922     ct3d->patrol_scrub_attrs.scrub_cycle_cap =
923                            CXL_MEMDEV_PS_SCRUB_CYCLE_CHANGE_CAP_DEFAULT |
924                            CXL_MEMDEV_PS_SCRUB_REALTIME_REPORT_CAP_DEFAULT;
925     ct3d->patrol_scrub_attrs.scrub_cycle =
926                            CXL_MEMDEV_PS_CUR_SCRUB_CYCLE_DEFAULT |
927                            (CXL_MEMDEV_PS_MIN_SCRUB_CYCLE_DEFAULT << 8);
928     ct3d->patrol_scrub_attrs.scrub_flags = CXL_MEMDEV_PS_ENABLE_DEFAULT;
929 
930     /* Set default value for DDR5 ECS read attributes */
931     ct3d->ecs_attrs.ecs_log_cap = CXL_ECS_LOG_ENTRY_TYPE_DEFAULT;
932     for (count = 0; count < CXL_ECS_NUM_MEDIA_FRUS; count++) {
933         ct3d->ecs_attrs.fru_attrs[count].ecs_cap =
934                             CXL_ECS_REALTIME_REPORT_CAP_DEFAULT;
935         ct3d->ecs_attrs.fru_attrs[count].ecs_config =
936                             CXL_ECS_THRESHOLD_COUNT_DEFAULT |
937                             (CXL_ECS_MODE_DEFAULT << 3);
938         /* Reserved */
939         ct3d->ecs_attrs.fru_attrs[count].ecs_flags = 0;
940     }
941 
942     return;
943 
944 err_release_cdat:
945     cxl_doe_cdat_release(cxl_cstate);
946 err_msix_uninit:
947     msix_uninit_exclusive_bar(pci_dev);
948 err_free_special_ops:
949     g_free(regs->special_ops);
950     if (ct3d->dc.host_dc) {
951         cxl_destroy_dc_regions(ct3d);
952         address_space_destroy(&ct3d->dc.host_dc_as);
953     }
954     if (ct3d->hostpmem) {
955         address_space_destroy(&ct3d->hostpmem_as);
956     }
957     if (ct3d->hostvmem) {
958         address_space_destroy(&ct3d->hostvmem_as);
959     }
960     return;
961 }
962 
963 static void ct3_exit(PCIDevice *pci_dev)
964 {
965     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
966     CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
967     ComponentRegisters *regs = &cxl_cstate->crb;
968 
969     pcie_aer_exit(pci_dev);
970     cxl_doe_cdat_release(cxl_cstate);
971     msix_uninit_exclusive_bar(pci_dev);
972     g_free(regs->special_ops);
973     if (ct3d->dc.host_dc) {
974         cxl_destroy_dc_regions(ct3d);
975         address_space_destroy(&ct3d->dc.host_dc_as);
976     }
977     if (ct3d->hostpmem) {
978         address_space_destroy(&ct3d->hostpmem_as);
979     }
980     if (ct3d->hostvmem) {
981         address_space_destroy(&ct3d->hostvmem_as);
982     }
983 }
984 
985 /*
986  * Mark the DPA range [dpa, dap + len - 1] to be backed and accessible. This
987  * happens when a DC extent is added and accepted by the host.
988  */
989 void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
990                                  uint64_t len)
991 {
992     CXLDCRegion *region;
993 
994     region = cxl_find_dc_region(ct3d, dpa, len);
995     if (!region) {
996         return;
997     }
998 
999     bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
1000                len / region->block_size);
1001 }
1002 
1003 /*
1004  * Check whether the DPA range [dpa, dpa + len - 1] is backed with DC extents.
1005  * Used when validating read/write to dc regions
1006  */
1007 bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
1008                                   uint64_t len)
1009 {
1010     CXLDCRegion *region;
1011     uint64_t nbits;
1012     long nr;
1013 
1014     region = cxl_find_dc_region(ct3d, dpa, len);
1015     if (!region) {
1016         return false;
1017     }
1018 
1019     nr = (dpa - region->base) / region->block_size;
1020     nbits = DIV_ROUND_UP(len, region->block_size);
1021     /*
1022      * if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is
1023      * backed with DC extents, return true; else return false.
1024      */
1025     return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + nbits;
1026 }
1027 
1028 /*
1029  * Mark the DPA range [dpa, dap + len - 1] to be unbacked and inaccessible.
1030  * This happens when a dc extent is released by the host.
1031  */
1032 void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
1033                                    uint64_t len)
1034 {
1035     CXLDCRegion *region;
1036     uint64_t nbits;
1037     long nr;
1038 
1039     region = cxl_find_dc_region(ct3d, dpa, len);
1040     if (!region) {
1041         return;
1042     }
1043 
1044     nr = (dpa - region->base) / region->block_size;
1045     nbits = len / region->block_size;
1046     bitmap_clear(region->blk_bitmap, nr, nbits);
1047 }
1048 
1049 static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
1050 {
1051     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
1052     uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
1053     unsigned int hdm_count;
1054     uint32_t cap;
1055     uint64_t dpa_base = 0;
1056     int i;
1057 
1058     cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY);
1059     hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap,
1060                                                  CXL_HDM_DECODER_CAPABILITY,
1061                                                  DECODER_COUNT));
1062 
1063     for (i = 0; i < hdm_count; i++) {
1064         uint64_t decoder_base, decoder_size, hpa_offset, skip;
1065         uint32_t hdm_ctrl, low, high;
1066         int ig, iw;
1067 
1068         low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc);
1069         high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * hdm_inc);
1070         decoder_base = ((uint64_t)high << 32) | (low & 0xf0000000);
1071 
1072         low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc);
1073         high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc);
1074         decoder_size = ((uint64_t)high << 32) | (low & 0xf0000000);
1075 
1076         low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_LO +
1077                        i * hdm_inc);
1078         high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_HI +
1079                         i * hdm_inc);
1080         skip = ((uint64_t)high << 32) | (low & 0xf0000000);
1081         dpa_base += skip;
1082 
1083         hpa_offset = (uint64_t)host_addr - decoder_base;
1084 
1085         hdm_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * hdm_inc);
1086         iw = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IW);
1087         ig = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IG);
1088         if (!FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) {
1089             return false;
1090         }
1091         if (((uint64_t)host_addr < decoder_base) ||
1092             (hpa_offset >= decoder_size)) {
1093             int decoded_iw = cxl_interleave_ways_dec(iw, &error_fatal);
1094 
1095             if (decoded_iw == 0) {
1096                 return false;
1097             }
1098 
1099             dpa_base += decoder_size / decoded_iw;
1100             continue;
1101         }
1102 
1103         if (iw < 8) {
1104             *dpa = dpa_base +
1105                 ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
1106                  ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset)
1107                   >> iw));
1108         } else {
1109             *dpa = dpa_base +
1110                 ((MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
1111                  ((((MAKE_64BIT_MASK(ig + iw, 64 - ig - iw) & hpa_offset)
1112                    >> (ig + iw)) / 3) << (ig + 8)));
1113         }
1114 
1115         return true;
1116     }
1117     return false;
1118 }
1119 
1120 static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
1121                                        hwaddr host_addr,
1122                                        unsigned int size,
1123                                        AddressSpace **as,
1124                                        uint64_t *dpa_offset)
1125 {
1126     MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
1127     uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
1128 
1129     if (ct3d->hostvmem) {
1130         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
1131         vmr_size = memory_region_size(vmr);
1132     }
1133     if (ct3d->hostpmem) {
1134         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
1135         pmr_size = memory_region_size(pmr);
1136     }
1137     if (ct3d->dc.host_dc) {
1138         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
1139         dc_size = memory_region_size(dc_mr);
1140     }
1141 
1142     if (!vmr && !pmr && !dc_mr) {
1143         return -ENODEV;
1144     }
1145 
1146     if (!cxl_type3_dpa(ct3d, host_addr, dpa_offset)) {
1147         return -EINVAL;
1148     }
1149 
1150     if (*dpa_offset >= vmr_size + pmr_size + dc_size) {
1151         return -EINVAL;
1152     }
1153 
1154     if (*dpa_offset < vmr_size) {
1155         *as = &ct3d->hostvmem_as;
1156     } else if (*dpa_offset < vmr_size + pmr_size) {
1157         *as = &ct3d->hostpmem_as;
1158         *dpa_offset -= vmr_size;
1159     } else {
1160         if (!ct3_test_region_block_backed(ct3d, *dpa_offset, size)) {
1161             return -ENODEV;
1162         }
1163 
1164         *as = &ct3d->dc.host_dc_as;
1165         *dpa_offset -= (vmr_size + pmr_size);
1166     }
1167 
1168     return 0;
1169 }
1170 
1171 MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
1172                            unsigned size, MemTxAttrs attrs)
1173 {
1174     CXLType3Dev *ct3d = CXL_TYPE3(d);
1175     uint64_t dpa_offset = 0;
1176     AddressSpace *as = NULL;
1177     int res;
1178 
1179     res = cxl_type3_hpa_to_as_and_dpa(ct3d, host_addr, size,
1180                                       &as, &dpa_offset);
1181     if (res) {
1182         return MEMTX_ERROR;
1183     }
1184 
1185     if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) {
1186         qemu_guest_getrandom_nofail(data, size);
1187         return MEMTX_OK;
1188     }
1189 
1190     return address_space_read(as, dpa_offset, attrs, data, size);
1191 }
1192 
1193 MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
1194                             unsigned size, MemTxAttrs attrs)
1195 {
1196     CXLType3Dev *ct3d = CXL_TYPE3(d);
1197     uint64_t dpa_offset = 0;
1198     AddressSpace *as = NULL;
1199     int res;
1200 
1201     res = cxl_type3_hpa_to_as_and_dpa(ct3d, host_addr, size,
1202                                       &as, &dpa_offset);
1203     if (res) {
1204         return MEMTX_ERROR;
1205     }
1206 
1207     if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) {
1208         return MEMTX_OK;
1209     }
1210 
1211     return address_space_write(as, dpa_offset, attrs, &data, size);
1212 }
1213 
1214 static void ct3d_reset(DeviceState *dev)
1215 {
1216     CXLType3Dev *ct3d = CXL_TYPE3(dev);
1217     uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1218     uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
1219 
1220     pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed);
1221     cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
1222     cxl_device_register_init_t3(ct3d, CXL_T3_MSIX_MBOX);
1223 
1224     /*
1225      * Bring up an endpoint to target with MCTP over VDM.
1226      * This device is emulating an MLD with single LD for now.
1227      */
1228     cxl_initialize_t3_fm_owned_ld_mctpcci(&ct3d->vdm_fm_owned_ld_mctp_cci,
1229                                           DEVICE(ct3d), DEVICE(ct3d),
1230                                           512); /* Max payload made up */
1231     cxl_initialize_t3_ld_cci(&ct3d->ld0_cci, DEVICE(ct3d), DEVICE(ct3d),
1232                              512); /* Max payload made up */
1233 
1234 }
1235 
1236 static const Property ct3_props[] = {
1237     DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND,
1238                      HostMemoryBackend *), /* for backward compatibility */
1239     DEFINE_PROP_LINK("persistent-memdev", CXLType3Dev, hostpmem,
1240                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1241     DEFINE_PROP_LINK("volatile-memdev", CXLType3Dev, hostvmem,
1242                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1243     DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
1244                      HostMemoryBackend *),
1245     DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
1246     DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
1247     DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
1248     DEFINE_PROP_LINK("volatile-dc-memdev", CXLType3Dev, dc.host_dc,
1249                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1250     DEFINE_PROP_PCIE_LINK_SPEED("x-speed", CXLType3Dev,
1251                                 speed, PCIE_LINK_SPEED_32),
1252     DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev,
1253                                 width, PCIE_LINK_WIDTH_16),
1254 };
1255 
1256 static uint64_t get_lsa_size(CXLType3Dev *ct3d)
1257 {
1258     MemoryRegion *mr;
1259 
1260     if (!ct3d->lsa) {
1261         return 0;
1262     }
1263 
1264     mr = host_memory_backend_get_memory(ct3d->lsa);
1265     return memory_region_size(mr);
1266 }
1267 
1268 static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
1269                                 uint64_t offset)
1270 {
1271     assert(offset + size <= memory_region_size(mr));
1272     assert(offset + size > offset);
1273 }
1274 
1275 static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
1276                     uint64_t offset)
1277 {
1278     MemoryRegion *mr;
1279     void *lsa;
1280 
1281     if (!ct3d->lsa) {
1282         return 0;
1283     }
1284 
1285     mr = host_memory_backend_get_memory(ct3d->lsa);
1286     validate_lsa_access(mr, size, offset);
1287 
1288     lsa = memory_region_get_ram_ptr(mr) + offset;
1289     memcpy(buf, lsa, size);
1290 
1291     return size;
1292 }
1293 
1294 static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
1295                     uint64_t offset)
1296 {
1297     MemoryRegion *mr;
1298     void *lsa;
1299 
1300     if (!ct3d->lsa) {
1301         return;
1302     }
1303 
1304     mr = host_memory_backend_get_memory(ct3d->lsa);
1305     validate_lsa_access(mr, size, offset);
1306 
1307     lsa = memory_region_get_ram_ptr(mr) + offset;
1308     memcpy(lsa, buf, size);
1309     memory_region_set_dirty(mr, offset, size);
1310 
1311     /*
1312      * Just like the PMEM, if the guest is not allowed to exit gracefully, label
1313      * updates will get lost.
1314      */
1315 }
1316 
1317 static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data)
1318 {
1319     MemoryRegion *vmr = NULL, *pmr = NULL, *dc_mr = NULL;
1320     AddressSpace *as;
1321     uint64_t vmr_size = 0, pmr_size = 0, dc_size = 0;
1322 
1323     if (ct3d->hostvmem) {
1324         vmr = host_memory_backend_get_memory(ct3d->hostvmem);
1325         vmr_size = memory_region_size(vmr);
1326     }
1327     if (ct3d->hostpmem) {
1328         pmr = host_memory_backend_get_memory(ct3d->hostpmem);
1329         pmr_size = memory_region_size(pmr);
1330     }
1331     if (ct3d->dc.host_dc) {
1332         dc_mr = host_memory_backend_get_memory(ct3d->dc.host_dc);
1333         dc_size = memory_region_size(dc_mr);
1334      }
1335 
1336     if (!vmr && !pmr && !dc_mr) {
1337         return false;
1338     }
1339 
1340     if (dpa_offset + CXL_CACHE_LINE_SIZE > vmr_size + pmr_size + dc_size) {
1341         return false;
1342     }
1343 
1344     if (dpa_offset < vmr_size) {
1345         as = &ct3d->hostvmem_as;
1346     } else if (dpa_offset < vmr_size + pmr_size) {
1347         as = &ct3d->hostpmem_as;
1348         dpa_offset -= vmr_size;
1349     } else {
1350         as = &ct3d->dc.host_dc_as;
1351         dpa_offset -= (vmr_size + pmr_size);
1352     }
1353 
1354     address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, data,
1355                         CXL_CACHE_LINE_SIZE);
1356     return true;
1357 }
1358 
1359 void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d)
1360 {
1361         ct3d->poison_list_overflowed = true;
1362         ct3d->poison_list_overflow_ts =
1363             cxl_device_get_timestamp(&ct3d->cxl_dstate);
1364 }
1365 
1366 void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d)
1367 {
1368     ct3d->poison_list_overflowed = false;
1369     ct3d->poison_list_overflow_ts = 0;
1370 }
1371 
1372 void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
1373                            Error **errp)
1374 {
1375     Object *obj = object_resolve_path(path, NULL);
1376     CXLType3Dev *ct3d;
1377     CXLPoison *p;
1378 
1379     if (length % 64) {
1380         error_setg(errp, "Poison injection must be in multiples of 64 bytes");
1381         return;
1382     }
1383     if (start % 64) {
1384         error_setg(errp, "Poison start address must be 64 byte aligned");
1385         return;
1386     }
1387     if (!obj) {
1388         error_setg(errp, "Unable to resolve path");
1389         return;
1390     }
1391     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1392         error_setg(errp, "Path does not point to a CXL type 3 device");
1393         return;
1394     }
1395 
1396     ct3d = CXL_TYPE3(obj);
1397 
1398     QLIST_FOREACH(p, &ct3d->poison_list, node) {
1399         if ((start < p->start + p->length) && (start + length > p->start)) {
1400             error_setg(errp,
1401                        "Overlap with existing poisoned region not supported");
1402             return;
1403         }
1404     }
1405 
1406     p = g_new0(CXLPoison, 1);
1407     p->length = length;
1408     p->start = start;
1409     /* Different from injected via the mbox */
1410     p->type = CXL_POISON_TYPE_INTERNAL;
1411 
1412     if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) {
1413         QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
1414         ct3d->poison_list_cnt++;
1415     } else {
1416         if (!ct3d->poison_list_overflowed) {
1417             cxl_set_poison_list_overflowed(ct3d);
1418         }
1419         QLIST_INSERT_HEAD(&ct3d->poison_list_bkp, p, node);
1420     }
1421 }
1422 
1423 /* For uncorrectable errors include support for multiple header recording */
1424 void qmp_cxl_inject_uncorrectable_errors(const char *path,
1425                                          CXLUncorErrorRecordList *errors,
1426                                          Error **errp)
1427 {
1428     Object *obj = object_resolve_path(path, NULL);
1429     static PCIEAERErr err = {};
1430     CXLType3Dev *ct3d;
1431     CXLError *cxl_err;
1432     uint32_t *reg_state;
1433     uint32_t unc_err;
1434     bool first;
1435 
1436     if (!obj) {
1437         error_setg(errp, "Unable to resolve path");
1438         return;
1439     }
1440 
1441     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1442         error_setg(errp, "Path does not point to a CXL type 3 device");
1443         return;
1444     }
1445 
1446     err.status = PCI_ERR_UNC_INTN;
1447     err.source_id = pci_requester_id(PCI_DEVICE(obj));
1448     err.flags = 0;
1449 
1450     ct3d = CXL_TYPE3(obj);
1451 
1452     first = QTAILQ_EMPTY(&ct3d->error_list);
1453     reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1454     while (errors) {
1455         uint32List *header = errors->value->header;
1456         uint8_t header_count = 0;
1457         int cxl_err_code;
1458 
1459         cxl_err_code = ct3d_qmp_uncor_err_to_cxl(errors->value->type);
1460         if (cxl_err_code < 0) {
1461             error_setg(errp, "Unknown error code");
1462             return;
1463         }
1464 
1465         /* If the error is masked, nothing to do here */
1466         if (!((1 << cxl_err_code) &
1467               ~ldl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK))) {
1468             errors = errors->next;
1469             continue;
1470         }
1471 
1472         cxl_err = g_malloc0(sizeof(*cxl_err));
1473 
1474         cxl_err->type = cxl_err_code;
1475         while (header && header_count < 32) {
1476             cxl_err->header[header_count++] = header->value;
1477             header = header->next;
1478         }
1479         if (header_count > 32) {
1480             error_setg(errp, "Header must be 32 DWORD or less");
1481             return;
1482         }
1483         QTAILQ_INSERT_TAIL(&ct3d->error_list, cxl_err, node);
1484 
1485         errors = errors->next;
1486     }
1487 
1488     if (first && !QTAILQ_EMPTY(&ct3d->error_list)) {
1489         uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
1490         uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
1491         uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
1492         int i;
1493 
1494         cxl_err = QTAILQ_FIRST(&ct3d->error_list);
1495         for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
1496             stl_le_p(header_log + i, cxl_err->header[i]);
1497         }
1498 
1499         capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
1500                              FIRST_ERROR_POINTER, cxl_err->type);
1501         stl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL, capctrl);
1502     }
1503 
1504     unc_err = 0;
1505     QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
1506         unc_err |= (1 << cxl_err->type);
1507     }
1508     if (!unc_err) {
1509         return;
1510     }
1511 
1512     stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, unc_err);
1513     pcie_aer_inject_error(PCI_DEVICE(obj), &err);
1514 
1515     return;
1516 }
1517 
1518 void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
1519                                       Error **errp)
1520 {
1521     static PCIEAERErr err = {};
1522     Object *obj = object_resolve_path(path, NULL);
1523     CXLType3Dev *ct3d;
1524     uint32_t *reg_state;
1525     uint32_t cor_err;
1526     int cxl_err_type;
1527 
1528     if (!obj) {
1529         error_setg(errp, "Unable to resolve path");
1530         return;
1531     }
1532     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1533         error_setg(errp, "Path does not point to a CXL type 3 device");
1534         return;
1535     }
1536 
1537     err.status = PCI_ERR_COR_INTERNAL;
1538     err.source_id = pci_requester_id(PCI_DEVICE(obj));
1539     err.flags = PCIE_AER_ERR_IS_CORRECTABLE;
1540 
1541     ct3d = CXL_TYPE3(obj);
1542     reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
1543     cor_err = ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS);
1544 
1545     cxl_err_type = ct3d_qmp_cor_err_to_cxl(type);
1546     if (cxl_err_type < 0) {
1547         error_setg(errp, "Invalid COR error");
1548         return;
1549     }
1550     /* If the error is masked, nothting to do here */
1551     if (!((1 << cxl_err_type) &
1552           ~ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK))) {
1553         return;
1554     }
1555 
1556     cor_err |= (1 << cxl_err_type);
1557     stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, cor_err);
1558 
1559     pcie_aer_inject_error(PCI_DEVICE(obj), &err);
1560 }
1561 
1562 static void cxl_assign_event_header(CXLEventRecordHdr *hdr,
1563                                     const QemuUUID *uuid, uint32_t flags,
1564                                     uint8_t length, uint64_t timestamp)
1565 {
1566     st24_le_p(&hdr->flags, flags);
1567     hdr->length = length;
1568     memcpy(&hdr->id, uuid, sizeof(hdr->id));
1569     stq_le_p(&hdr->timestamp, timestamp);
1570 }
1571 
1572 static const QemuUUID gen_media_uuid = {
1573     .data = UUID(0xfbcd0a77, 0xc260, 0x417f,
1574                  0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6),
1575 };
1576 
1577 static const QemuUUID dram_uuid = {
1578     .data = UUID(0x601dcbb3, 0x9c06, 0x4eab, 0xb8, 0xaf,
1579                  0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24),
1580 };
1581 
1582 static const QemuUUID memory_module_uuid = {
1583     .data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86,
1584                  0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74),
1585 };
1586 
1587 #define CXL_GMER_VALID_CHANNEL                          BIT(0)
1588 #define CXL_GMER_VALID_RANK                             BIT(1)
1589 #define CXL_GMER_VALID_DEVICE                           BIT(2)
1590 #define CXL_GMER_VALID_COMPONENT                        BIT(3)
1591 
1592 static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
1593 {
1594     switch (log) {
1595     case CXL_EVENT_LOG_INFORMATIONAL:
1596         return CXL_EVENT_TYPE_INFO;
1597     case CXL_EVENT_LOG_WARNING:
1598         return CXL_EVENT_TYPE_WARN;
1599     case CXL_EVENT_LOG_FAILURE:
1600         return CXL_EVENT_TYPE_FAIL;
1601     case CXL_EVENT_LOG_FATAL:
1602         return CXL_EVENT_TYPE_FATAL;
1603     default:
1604         return -EINVAL;
1605     }
1606 }
1607 /* Component ID is device specific.  Define this as a string. */
1608 void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log,
1609                                         uint8_t flags, uint64_t dpa,
1610                                         uint8_t descriptor, uint8_t type,
1611                                         uint8_t transaction_type,
1612                                         bool has_channel, uint8_t channel,
1613                                         bool has_rank, uint8_t rank,
1614                                         bool has_device, uint32_t device,
1615                                         const char *component_id,
1616                                         Error **errp)
1617 {
1618     Object *obj = object_resolve_path(path, NULL);
1619     CXLEventGenMedia gem;
1620     CXLEventRecordHdr *hdr = &gem.hdr;
1621     CXLDeviceState *cxlds;
1622     CXLType3Dev *ct3d;
1623     uint16_t valid_flags = 0;
1624     uint8_t enc_log;
1625     int rc;
1626 
1627     if (!obj) {
1628         error_setg(errp, "Unable to resolve path");
1629         return;
1630     }
1631     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1632         error_setg(errp, "Path does not point to a CXL type 3 device");
1633         return;
1634     }
1635     ct3d = CXL_TYPE3(obj);
1636     cxlds = &ct3d->cxl_dstate;
1637 
1638     rc = ct3d_qmp_cxl_event_log_enc(log);
1639     if (rc < 0) {
1640         error_setg(errp, "Unhandled error log type");
1641         return;
1642     }
1643     enc_log = rc;
1644 
1645     memset(&gem, 0, sizeof(gem));
1646     cxl_assign_event_header(hdr, &gen_media_uuid, flags, sizeof(gem),
1647                             cxl_device_get_timestamp(&ct3d->cxl_dstate));
1648 
1649     stq_le_p(&gem.phys_addr, dpa);
1650     gem.descriptor = descriptor;
1651     gem.type = type;
1652     gem.transaction_type = transaction_type;
1653 
1654     if (has_channel) {
1655         gem.channel = channel;
1656         valid_flags |= CXL_GMER_VALID_CHANNEL;
1657     }
1658 
1659     if (has_rank) {
1660         gem.rank = rank;
1661         valid_flags |= CXL_GMER_VALID_RANK;
1662     }
1663 
1664     if (has_device) {
1665         st24_le_p(gem.device, device);
1666         valid_flags |= CXL_GMER_VALID_DEVICE;
1667     }
1668 
1669     if (component_id) {
1670         strncpy((char *)gem.component_id, component_id,
1671                 sizeof(gem.component_id) - 1);
1672         valid_flags |= CXL_GMER_VALID_COMPONENT;
1673     }
1674 
1675     stw_le_p(&gem.validity_flags, valid_flags);
1676 
1677     if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&gem)) {
1678         cxl_event_irq_assert(ct3d);
1679     }
1680 }
1681 
1682 #define CXL_DRAM_VALID_CHANNEL                          BIT(0)
1683 #define CXL_DRAM_VALID_RANK                             BIT(1)
1684 #define CXL_DRAM_VALID_NIBBLE_MASK                      BIT(2)
1685 #define CXL_DRAM_VALID_BANK_GROUP                       BIT(3)
1686 #define CXL_DRAM_VALID_BANK                             BIT(4)
1687 #define CXL_DRAM_VALID_ROW                              BIT(5)
1688 #define CXL_DRAM_VALID_COLUMN                           BIT(6)
1689 #define CXL_DRAM_VALID_CORRECTION_MASK                  BIT(7)
1690 
1691 void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
1692                                uint64_t dpa, uint8_t descriptor,
1693                                uint8_t type, uint8_t transaction_type,
1694                                bool has_channel, uint8_t channel,
1695                                bool has_rank, uint8_t rank,
1696                                bool has_nibble_mask, uint32_t nibble_mask,
1697                                bool has_bank_group, uint8_t bank_group,
1698                                bool has_bank, uint8_t bank,
1699                                bool has_row, uint32_t row,
1700                                bool has_column, uint16_t column,
1701                                bool has_correction_mask,
1702                                uint64List *correction_mask,
1703                                Error **errp)
1704 {
1705     Object *obj = object_resolve_path(path, NULL);
1706     CXLEventDram dram;
1707     CXLEventRecordHdr *hdr = &dram.hdr;
1708     CXLDeviceState *cxlds;
1709     CXLType3Dev *ct3d;
1710     uint16_t valid_flags = 0;
1711     uint8_t enc_log;
1712     int rc;
1713 
1714     if (!obj) {
1715         error_setg(errp, "Unable to resolve path");
1716         return;
1717     }
1718     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1719         error_setg(errp, "Path does not point to a CXL type 3 device");
1720         return;
1721     }
1722     ct3d = CXL_TYPE3(obj);
1723     cxlds = &ct3d->cxl_dstate;
1724 
1725     rc = ct3d_qmp_cxl_event_log_enc(log);
1726     if (rc < 0) {
1727         error_setg(errp, "Unhandled error log type");
1728         return;
1729     }
1730     enc_log = rc;
1731 
1732     memset(&dram, 0, sizeof(dram));
1733     cxl_assign_event_header(hdr, &dram_uuid, flags, sizeof(dram),
1734                             cxl_device_get_timestamp(&ct3d->cxl_dstate));
1735     stq_le_p(&dram.phys_addr, dpa);
1736     dram.descriptor = descriptor;
1737     dram.type = type;
1738     dram.transaction_type = transaction_type;
1739 
1740     if (has_channel) {
1741         dram.channel = channel;
1742         valid_flags |= CXL_DRAM_VALID_CHANNEL;
1743     }
1744 
1745     if (has_rank) {
1746         dram.rank = rank;
1747         valid_flags |= CXL_DRAM_VALID_RANK;
1748     }
1749 
1750     if (has_nibble_mask) {
1751         st24_le_p(dram.nibble_mask, nibble_mask);
1752         valid_flags |= CXL_DRAM_VALID_NIBBLE_MASK;
1753     }
1754 
1755     if (has_bank_group) {
1756         dram.bank_group = bank_group;
1757         valid_flags |= CXL_DRAM_VALID_BANK_GROUP;
1758     }
1759 
1760     if (has_bank) {
1761         dram.bank = bank;
1762         valid_flags |= CXL_DRAM_VALID_BANK;
1763     }
1764 
1765     if (has_row) {
1766         st24_le_p(dram.row, row);
1767         valid_flags |= CXL_DRAM_VALID_ROW;
1768     }
1769 
1770     if (has_column) {
1771         stw_le_p(&dram.column, column);
1772         valid_flags |= CXL_DRAM_VALID_COLUMN;
1773     }
1774 
1775     if (has_correction_mask) {
1776         int count = 0;
1777         while (correction_mask && count < 4) {
1778             stq_le_p(&dram.correction_mask[count],
1779                      correction_mask->value);
1780             count++;
1781             correction_mask = correction_mask->next;
1782         }
1783         valid_flags |= CXL_DRAM_VALID_CORRECTION_MASK;
1784     }
1785 
1786     stw_le_p(&dram.validity_flags, valid_flags);
1787 
1788     if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&dram)) {
1789         cxl_event_irq_assert(ct3d);
1790     }
1791     return;
1792 }
1793 
1794 void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
1795                                         uint8_t flags, uint8_t type,
1796                                         uint8_t health_status,
1797                                         uint8_t media_status,
1798                                         uint8_t additional_status,
1799                                         uint8_t life_used,
1800                                         int16_t temperature,
1801                                         uint32_t dirty_shutdown_count,
1802                                         uint32_t corrected_volatile_error_count,
1803                                         uint32_t corrected_persist_error_count,
1804                                         Error **errp)
1805 {
1806     Object *obj = object_resolve_path(path, NULL);
1807     CXLEventMemoryModule module;
1808     CXLEventRecordHdr *hdr = &module.hdr;
1809     CXLDeviceState *cxlds;
1810     CXLType3Dev *ct3d;
1811     uint8_t enc_log;
1812     int rc;
1813 
1814     if (!obj) {
1815         error_setg(errp, "Unable to resolve path");
1816         return;
1817     }
1818     if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
1819         error_setg(errp, "Path does not point to a CXL type 3 device");
1820         return;
1821     }
1822     ct3d = CXL_TYPE3(obj);
1823     cxlds = &ct3d->cxl_dstate;
1824 
1825     rc = ct3d_qmp_cxl_event_log_enc(log);
1826     if (rc < 0) {
1827         error_setg(errp, "Unhandled error log type");
1828         return;
1829     }
1830     enc_log = rc;
1831 
1832     memset(&module, 0, sizeof(module));
1833     cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module),
1834                             cxl_device_get_timestamp(&ct3d->cxl_dstate));
1835 
1836     module.type = type;
1837     module.health_status = health_status;
1838     module.media_status = media_status;
1839     module.additional_status = additional_status;
1840     module.life_used = life_used;
1841     stw_le_p(&module.temperature, temperature);
1842     stl_le_p(&module.dirty_shutdown_count, dirty_shutdown_count);
1843     stl_le_p(&module.corrected_volatile_error_count,
1844              corrected_volatile_error_count);
1845     stl_le_p(&module.corrected_persistent_error_count,
1846              corrected_persist_error_count);
1847 
1848     if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) {
1849         cxl_event_irq_assert(ct3d);
1850     }
1851 }
1852 
1853 /* CXL r3.1 Table 8-50: Dynamic Capacity Event Record */
1854 static const QemuUUID dynamic_capacity_uuid = {
1855     .data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
1856                  0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
1857 };
1858 
1859 typedef enum CXLDCEventType {
1860     DC_EVENT_ADD_CAPACITY = 0x0,
1861     DC_EVENT_RELEASE_CAPACITY = 0x1,
1862     DC_EVENT_FORCED_RELEASE_CAPACITY = 0x2,
1863     DC_EVENT_REGION_CONFIG_UPDATED = 0x3,
1864     DC_EVENT_ADD_CAPACITY_RSP = 0x4,
1865     DC_EVENT_CAPACITY_RELEASED = 0x5,
1866 } CXLDCEventType;
1867 
1868 /*
1869  * Check whether the range [dpa, dpa + len - 1] has overlaps with extents in
1870  * the list.
1871  * Return value: return true if has overlaps; otherwise, return false
1872  */
1873 static bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list,
1874                                            uint64_t dpa, uint64_t len)
1875 {
1876     CXLDCExtent *ent;
1877     Range range1, range2;
1878 
1879     if (!list) {
1880         return false;
1881     }
1882 
1883     range_init_nofail(&range1, dpa, len);
1884     QTAILQ_FOREACH(ent, list, node) {
1885         range_init_nofail(&range2, ent->start_dpa, ent->len);
1886         if (range_overlaps_range(&range1, &range2)) {
1887             return true;
1888         }
1889     }
1890     return false;
1891 }
1892 
1893 /*
1894  * Check whether the range [dpa, dpa + len - 1] is contained by extents in
1895  * the list.
1896  * Will check multiple extents containment once superset release is added.
1897  * Return value: return true if range is contained; otherwise, return false
1898  */
1899 bool cxl_extents_contains_dpa_range(CXLDCExtentList *list,
1900                                     uint64_t dpa, uint64_t len)
1901 {
1902     CXLDCExtent *ent;
1903     Range range1, range2;
1904 
1905     if (!list) {
1906         return false;
1907     }
1908 
1909     range_init_nofail(&range1, dpa, len);
1910     QTAILQ_FOREACH(ent, list, node) {
1911         range_init_nofail(&range2, ent->start_dpa, ent->len);
1912         if (range_contains_range(&range2, &range1)) {
1913             return true;
1914         }
1915     }
1916     return false;
1917 }
1918 
1919 static bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list,
1920                                                  uint64_t dpa, uint64_t len)
1921 {
1922     CXLDCExtentGroup *group;
1923 
1924     if (!list) {
1925         return false;
1926     }
1927 
1928     QTAILQ_FOREACH(group, list, node) {
1929         if (cxl_extents_overlaps_dpa_range(&group->list, dpa, len)) {
1930             return true;
1931         }
1932     }
1933     return false;
1934 }
1935 
1936 /*
1937  * The main function to process dynamic capacity event with extent list.
1938  * Currently DC extents add/release requests are processed.
1939  */
1940 static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
1941         uint16_t hid, CXLDCEventType type, uint8_t rid,
1942         CxlDynamicCapacityExtentList *records, Error **errp)
1943 {
1944     Object *obj;
1945     CXLEventDynamicCapacity dCap = {};
1946     CXLEventRecordHdr *hdr = &dCap.hdr;
1947     CXLType3Dev *dcd;
1948     uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
1949     uint32_t num_extents = 0;
1950     CxlDynamicCapacityExtentList *list;
1951     CXLDCExtentGroup *group = NULL;
1952     g_autofree CXLDCExtentRaw *extents = NULL;
1953     uint8_t enc_log = CXL_EVENT_TYPE_DYNAMIC_CAP;
1954     uint64_t dpa, offset, len, block_size;
1955     g_autofree unsigned long *blk_bitmap = NULL;
1956     int i;
1957 
1958     obj = object_resolve_path_type(path, TYPE_CXL_TYPE3, NULL);
1959     if (!obj) {
1960         error_setg(errp, "Unable to resolve CXL type 3 device");
1961         return;
1962     }
1963 
1964     dcd = CXL_TYPE3(obj);
1965     if (!dcd->dc.num_regions) {
1966         error_setg(errp, "No dynamic capacity support from the device");
1967         return;
1968     }
1969 
1970 
1971     if (rid >= dcd->dc.num_regions) {
1972         error_setg(errp, "region id is too large");
1973         return;
1974     }
1975     block_size = dcd->dc.regions[rid].block_size;
1976     blk_bitmap = bitmap_new(dcd->dc.regions[rid].len / block_size);
1977 
1978     /* Sanity check and count the extents */
1979     list = records;
1980     while (list) {
1981         offset = list->value->offset;
1982         len = list->value->len;
1983         dpa = offset + dcd->dc.regions[rid].base;
1984 
1985         if (len == 0) {
1986             error_setg(errp, "extent with 0 length is not allowed");
1987             return;
1988         }
1989 
1990         if (offset % block_size || len % block_size) {
1991             error_setg(errp, "dpa or len is not aligned to region block size");
1992             return;
1993         }
1994 
1995         if (offset + len > dcd->dc.regions[rid].len) {
1996             error_setg(errp, "extent range is beyond the region end");
1997             return;
1998         }
1999 
2000         /* No duplicate or overlapped extents are allowed */
2001         if (test_any_bits_set(blk_bitmap, offset / block_size,
2002                               len / block_size)) {
2003             error_setg(errp, "duplicate or overlapped extents are detected");
2004             return;
2005         }
2006         bitmap_set(blk_bitmap, offset / block_size, len / block_size);
2007 
2008         if (type == DC_EVENT_RELEASE_CAPACITY) {
2009             if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending,
2010                                                      dpa, len)) {
2011                 error_setg(errp,
2012                            "cannot release extent with pending DPA range");
2013                 return;
2014             }
2015             if (!ct3_test_region_block_backed(dcd, dpa, len)) {
2016                 error_setg(errp,
2017                            "cannot release extent with non-existing DPA range");
2018                 return;
2019             }
2020         } else if (type == DC_EVENT_ADD_CAPACITY) {
2021             if (cxl_extents_overlaps_dpa_range(&dcd->dc.extents, dpa, len)) {
2022                 error_setg(errp,
2023                            "cannot add DPA already accessible to the same LD");
2024                 return;
2025             }
2026             if (cxl_extent_groups_overlaps_dpa_range(&dcd->dc.extents_pending,
2027                                                      dpa, len)) {
2028                 error_setg(errp,
2029                            "cannot add DPA again while still pending");
2030                 return;
2031             }
2032         }
2033         list = list->next;
2034         num_extents++;
2035     }
2036 
2037     /* Create extent list for event being passed to host */
2038     i = 0;
2039     list = records;
2040     extents = g_new0(CXLDCExtentRaw, num_extents);
2041     while (list) {
2042         offset = list->value->offset;
2043         len = list->value->len;
2044         dpa = dcd->dc.regions[rid].base + offset;
2045 
2046         extents[i].start_dpa = dpa;
2047         extents[i].len = len;
2048         memset(extents[i].tag, 0, 0x10);
2049         extents[i].shared_seq = 0;
2050         if (type == DC_EVENT_ADD_CAPACITY) {
2051             group = cxl_insert_extent_to_extent_group(group,
2052                                                       extents[i].start_dpa,
2053                                                       extents[i].len,
2054                                                       extents[i].tag,
2055                                                       extents[i].shared_seq);
2056         }
2057 
2058         list = list->next;
2059         i++;
2060     }
2061     if (group) {
2062         cxl_extent_group_list_insert_tail(&dcd->dc.extents_pending, group);
2063     }
2064 
2065     /*
2066      * CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record
2067      *
2068      * All Dynamic Capacity event records shall set the Event Record Severity
2069      * field in the Common Event Record Format to Informational Event. All
2070      * Dynamic Capacity related events shall be logged in the Dynamic Capacity
2071      * Event Log.
2072      */
2073     cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
2074                             cxl_device_get_timestamp(&dcd->cxl_dstate));
2075 
2076     dCap.type = type;
2077     /* FIXME: for now, validity flag is cleared */
2078     dCap.validity_flags = 0;
2079     stw_le_p(&dCap.host_id, hid);
2080     /* only valid for DC_REGION_CONFIG_UPDATED event */
2081     dCap.updated_region_id = 0;
2082     for (i = 0; i < num_extents; i++) {
2083         memcpy(&dCap.dynamic_capacity_extent, &extents[i],
2084                sizeof(CXLDCExtentRaw));
2085 
2086         dCap.flags = 0;
2087         if (i < num_extents - 1) {
2088             /* Set "More" flag */
2089             dCap.flags |= BIT(0);
2090         }
2091 
2092         if (cxl_event_insert(&dcd->cxl_dstate, enc_log,
2093                              (CXLEventRecordRaw *)&dCap)) {
2094             cxl_event_irq_assert(dcd);
2095         }
2096     }
2097 }
2098 
2099 void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id,
2100                                   CxlExtentSelectionPolicy sel_policy,
2101                                   uint8_t region, const char *tag,
2102                                   CxlDynamicCapacityExtentList  *extents,
2103                                   Error **errp)
2104 {
2105     switch (sel_policy) {
2106     case CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE:
2107         qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id,
2108                                                       DC_EVENT_ADD_CAPACITY,
2109                                                       region, extents, errp);
2110         return;
2111     default:
2112         error_setg(errp, "Selection policy not supported");
2113         return;
2114     }
2115 }
2116 
2117 void qmp_cxl_release_dynamic_capacity(const char *path, uint16_t host_id,
2118                                       CxlExtentRemovalPolicy removal_policy,
2119                                       bool has_forced_removal,
2120                                       bool forced_removal,
2121                                       bool has_sanitize_on_release,
2122                                       bool sanitize_on_release,
2123                                       uint8_t region,
2124                                       const char *tag,
2125                                       CxlDynamicCapacityExtentList  *extents,
2126                                       Error **errp)
2127 {
2128     CXLDCEventType type = DC_EVENT_RELEASE_CAPACITY;
2129 
2130     if (has_forced_removal && forced_removal) {
2131         /* TODO: enable forced removal in the future */
2132         type = DC_EVENT_FORCED_RELEASE_CAPACITY;
2133         error_setg(errp, "Forced removal not supported yet");
2134         return;
2135     }
2136 
2137     switch (removal_policy) {
2138     case CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE:
2139         qmp_cxl_process_dynamic_capacity_prescriptive(path, host_id, type,
2140                                                       region, extents, errp);
2141         return;
2142     default:
2143         error_setg(errp, "Removal policy not supported");
2144         return;
2145     }
2146 }
2147 
2148 static void ct3_class_init(ObjectClass *oc, void *data)
2149 {
2150     DeviceClass *dc = DEVICE_CLASS(oc);
2151     PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
2152     CXLType3Class *cvc = CXL_TYPE3_CLASS(oc);
2153 
2154     pc->realize = ct3_realize;
2155     pc->exit = ct3_exit;
2156     pc->class_id = PCI_CLASS_MEMORY_CXL;
2157     pc->vendor_id = PCI_VENDOR_ID_INTEL;
2158     pc->device_id = 0xd93; /* LVF for now */
2159     pc->revision = 1;
2160 
2161     pc->config_write = ct3d_config_write;
2162     pc->config_read = ct3d_config_read;
2163 
2164     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
2165     dc->desc = "CXL Memory Device (Type 3)";
2166     device_class_set_legacy_reset(dc, ct3d_reset);
2167     device_class_set_props(dc, ct3_props);
2168 
2169     cvc->get_lsa_size = get_lsa_size;
2170     cvc->get_lsa = get_lsa;
2171     cvc->set_lsa = set_lsa;
2172     cvc->set_cacheline = set_cacheline;
2173 }
2174 
2175 static const TypeInfo ct3d_info = {
2176     .name = TYPE_CXL_TYPE3,
2177     .parent = TYPE_PCI_DEVICE,
2178     .class_size = sizeof(struct CXLType3Class),
2179     .class_init = ct3_class_init,
2180     .instance_size = sizeof(CXLType3Dev),
2181     .interfaces = (InterfaceInfo[]) {
2182         { INTERFACE_CXL_DEVICE },
2183         { INTERFACE_PCIE_DEVICE },
2184         {}
2185     },
2186 };
2187 
2188 static void ct3d_registers(void)
2189 {
2190     type_register_static(&ct3d_info);
2191 }
2192 
2193 type_init(ct3d_registers);
2194