xref: /qemu/hw/intc/xive.c (revision 513823e7521a09ed7ad1e32e6454bac3b2cbf52d)
1 /*
2  * QEMU PowerPC XIVE interrupt controller model
3  *
4  * Copyright (c) 2017-2018, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/log.h"
12 #include "qemu/module.h"
13 #include "qapi/error.h"
14 #include "target/ppc/cpu.h"
15 #include "system/cpus.h"
16 #include "system/dma.h"
17 #include "system/reset.h"
18 #include "hw/qdev-properties.h"
19 #include "migration/vmstate.h"
20 #include "hw/irq.h"
21 #include "hw/ppc/xive.h"
22 #include "hw/ppc/xive2.h"
23 #include "hw/ppc/xive_regs.h"
24 #include "trace.h"
25 
26 /*
27  * XIVE Thread Interrupt Management context
28  */
29 
30 /*
31  * Convert an Interrupt Pending Buffer (IPB) register to a Pending
32  * Interrupt Priority Register (PIPR), which contains the priority of
33  * the most favored pending notification.
34  */
35 static uint8_t ipb_to_pipr(uint8_t ibp)
36 {
37     return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
38 }
39 
40 static uint8_t exception_mask(uint8_t ring)
41 {
42     switch (ring) {
43     case TM_QW1_OS:
44         return TM_QW1_NSR_EO;
45     case TM_QW3_HV_PHYS:
46         return TM_QW3_NSR_HE;
47     default:
48         g_assert_not_reached();
49     }
50 }
51 
52 static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring)
53 {
54         switch (ring) {
55         case TM_QW0_USER:
56                 return 0; /* Not supported */
57         case TM_QW1_OS:
58                 return tctx->os_output;
59         case TM_QW2_HV_POOL:
60         case TM_QW3_HV_PHYS:
61                 return tctx->hv_output;
62         default:
63                 return 0;
64         }
65 }
66 
67 static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
68 {
69     uint8_t *regs = &tctx->regs[ring];
70     uint8_t nsr = regs[TM_NSR];
71     uint8_t mask = exception_mask(ring);
72 
73     qemu_irq_lower(xive_tctx_output(tctx, ring));
74 
75     if (regs[TM_NSR] & mask) {
76         uint8_t cppr = regs[TM_PIPR];
77         uint8_t alt_ring;
78         uint8_t *alt_regs;
79 
80         /* POOL interrupt uses IPB in QW2, POOL ring */
81         if ((ring == TM_QW3_HV_PHYS) && (nsr & (TM_QW3_NSR_HE_POOL << 6))) {
82             alt_ring = TM_QW2_HV_POOL;
83         } else {
84             alt_ring = ring;
85         }
86         alt_regs = &tctx->regs[alt_ring];
87 
88         regs[TM_CPPR] = cppr;
89 
90         /* Reset the pending buffer bit */
91         alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
92 
93         /* Drop Exception bit */
94         regs[TM_NSR] &= ~mask;
95 
96         trace_xive_tctx_accept(tctx->cs->cpu_index, alt_ring,
97                                alt_regs[TM_IPB], regs[TM_PIPR],
98                                regs[TM_CPPR], regs[TM_NSR]);
99     }
100 
101     return ((uint64_t)nsr << 8) | regs[TM_CPPR];
102 }
103 
104 static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
105 {
106     /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
107     uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
108     uint8_t *alt_regs = &tctx->regs[alt_ring];
109     uint8_t *regs = &tctx->regs[ring];
110 
111     if (alt_regs[TM_PIPR] < alt_regs[TM_CPPR]) {
112         switch (ring) {
113         case TM_QW1_OS:
114             regs[TM_NSR] |= TM_QW1_NSR_EO;
115             break;
116         case TM_QW2_HV_POOL:
117             alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6);
118             break;
119         case TM_QW3_HV_PHYS:
120             regs[TM_NSR] |= (TM_QW3_NSR_HE_PHYS << 6);
121             break;
122         default:
123             g_assert_not_reached();
124         }
125         trace_xive_tctx_notify(tctx->cs->cpu_index, ring,
126                                regs[TM_IPB], alt_regs[TM_PIPR],
127                                alt_regs[TM_CPPR], alt_regs[TM_NSR]);
128         qemu_irq_raise(xive_tctx_output(tctx, ring));
129     }
130 }
131 
132 void xive_tctx_reset_signal(XiveTCTX *tctx, uint8_t ring)
133 {
134     /*
135      * Lower the External interrupt. Used when pulling a context. It is
136      * necessary to avoid catching it in the higher privilege context. It
137      * should be raised again when re-pushing the lower privilege context.
138      */
139     qemu_irq_lower(xive_tctx_output(tctx, ring));
140 }
141 
142 static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
143 {
144     uint8_t *regs = &tctx->regs[ring];
145     uint8_t pipr_min;
146     uint8_t ring_min;
147 
148     trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring,
149                              regs[TM_IPB], regs[TM_PIPR],
150                              cppr, regs[TM_NSR]);
151 
152     if (cppr > XIVE_PRIORITY_MAX) {
153         cppr = 0xff;
154     }
155 
156     tctx->regs[ring + TM_CPPR] = cppr;
157 
158     /*
159      * Recompute the PIPR based on local pending interrupts.  The PHYS
160      * ring must take the minimum of both the PHYS and POOL PIPR values.
161      */
162     pipr_min = ipb_to_pipr(regs[TM_IPB]);
163     ring_min = ring;
164 
165     /* PHYS updates also depend on POOL values */
166     if (ring == TM_QW3_HV_PHYS) {
167         uint8_t *pool_regs = &tctx->regs[TM_QW2_HV_POOL];
168 
169         /* POOL values only matter if POOL ctx is valid */
170         if (pool_regs[TM_WORD2] & 0x80) {
171 
172             uint8_t pool_pipr = ipb_to_pipr(pool_regs[TM_IPB]);
173 
174             /*
175              * Determine highest priority interrupt and
176              * remember which ring has it.
177              */
178             if (pool_pipr < pipr_min) {
179                 pipr_min = pool_pipr;
180                 ring_min = TM_QW2_HV_POOL;
181             }
182         }
183     }
184 
185     regs[TM_PIPR] = pipr_min;
186 
187     /* CPPR has changed, check if we need to raise a pending exception */
188     xive_tctx_notify(tctx, ring_min);
189 }
190 
191 void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb)
192 {
193     uint8_t *regs = &tctx->regs[ring];
194 
195     regs[TM_IPB] |= ipb;
196     regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
197     xive_tctx_notify(tctx, ring);
198 }
199 
200 /*
201  * XIVE Thread Interrupt Management Area (TIMA)
202  */
203 
204 static void xive_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx,
205                                 hwaddr offset, uint64_t value, unsigned size)
206 {
207     xive_tctx_set_cppr(tctx, TM_QW3_HV_PHYS, value & 0xff);
208 }
209 
210 static uint64_t xive_tm_ack_hv_reg(XivePresenter *xptr, XiveTCTX *tctx,
211                                    hwaddr offset, unsigned size)
212 {
213     return xive_tctx_accept(tctx, TM_QW3_HV_PHYS);
214 }
215 
216 static uint64_t xive_tm_pull_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx,
217                                       hwaddr offset, unsigned size)
218 {
219     uint32_t qw2w2_prev = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
220     uint32_t qw2w2;
221 
222     qw2w2 = xive_set_field32(TM_QW2W2_VP, qw2w2_prev, 0);
223     memcpy(&tctx->regs[TM_QW2_HV_POOL + TM_WORD2], &qw2w2, 4);
224     return qw2w2;
225 }
226 
227 static uint64_t xive_tm_pull_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx,
228                                       hwaddr offset, unsigned size)
229 {
230     uint8_t qw3b8_prev = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2];
231     uint8_t qw3b8;
232 
233     qw3b8 = qw3b8_prev & ~TM_QW3B8_VT;
234     tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = qw3b8;
235     return qw3b8;
236 }
237 
238 static void xive_tm_vt_push(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
239                             uint64_t value, unsigned size)
240 {
241     tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = value & 0xff;
242 }
243 
244 static uint64_t xive_tm_vt_poll(XivePresenter *xptr, XiveTCTX *tctx,
245                                 hwaddr offset, unsigned size)
246 {
247     return tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] & 0xff;
248 }
249 
250 /*
251  * Define an access map for each page of the TIMA that we will use in
252  * the memory region ops to filter values when doing loads and stores
253  * of raw registers values
254  *
255  * Registers accessibility bits :
256  *
257  *    0x0 - no access
258  *    0x1 - write only
259  *    0x2 - read only
260  *    0x3 - read/write
261  */
262 
263 static const uint8_t xive_tm_hw_view[] = {
264     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
265     3, 3, 3, 3,   3, 3, 0, 2,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-1 OS   */
266     0, 0, 3, 3,   0, 3, 3, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-2 POOL */
267     3, 3, 3, 3,   0, 3, 0, 2,   3, 0, 0, 3,   3, 3, 3, 0, /* QW-3 PHYS */
268 };
269 
270 static const uint8_t xive_tm_hv_view[] = {
271     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
272     3, 3, 3, 3,   3, 3, 0, 2,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-1 OS   */
273     0, 0, 3, 3,   0, 3, 3, 0,   0, 3, 3, 3,   0, 0, 0, 0, /* QW-2 POOL */
274     3, 3, 3, 3,   0, 3, 0, 2,   3, 0, 0, 3,   0, 0, 0, 0, /* QW-3 PHYS */
275 };
276 
277 static const uint8_t xive_tm_os_view[] = {
278     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
279     2, 3, 2, 2,   2, 2, 0, 2,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-1 OS   */
280     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-2 POOL */
281     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-3 PHYS */
282 };
283 
284 static const uint8_t xive_tm_user_view[] = {
285     3, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-0 User */
286     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-1 OS   */
287     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-2 POOL */
288     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-3 PHYS */
289 };
290 
291 /*
292  * Overall TIMA access map for the thread interrupt management context
293  * registers
294  */
295 static const uint8_t *xive_tm_views[] = {
296     [XIVE_TM_HW_PAGE]   = xive_tm_hw_view,
297     [XIVE_TM_HV_PAGE]   = xive_tm_hv_view,
298     [XIVE_TM_OS_PAGE]   = xive_tm_os_view,
299     [XIVE_TM_USER_PAGE] = xive_tm_user_view,
300 };
301 
302 /*
303  * Computes a register access mask for a given offset in the TIMA
304  */
305 static uint64_t xive_tm_mask(hwaddr offset, unsigned size, bool write)
306 {
307     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
308     uint8_t reg_offset = offset & TM_REG_OFFSET;
309     uint8_t reg_mask = write ? 0x1 : 0x2;
310     uint64_t mask = 0x0;
311     int i;
312 
313     for (i = 0; i < size; i++) {
314         if (xive_tm_views[page_offset][reg_offset + i] & reg_mask) {
315             mask |= (uint64_t) 0xff << (8 * (size - i - 1));
316         }
317     }
318 
319     return mask;
320 }
321 
322 static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
323                               unsigned size)
324 {
325     uint8_t ring_offset = offset & TM_RING_OFFSET;
326     uint8_t reg_offset = offset & TM_REG_OFFSET;
327     uint64_t mask = xive_tm_mask(offset, size, true);
328     int i;
329 
330     /*
331      * Only 4 or 8 bytes stores are allowed and the User ring is
332      * excluded
333      */
334     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
335         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%"
336                       HWADDR_PRIx"\n", offset);
337         return;
338     }
339 
340     /*
341      * Use the register offset for the raw values and filter out
342      * reserved values
343      */
344     for (i = 0; i < size; i++) {
345         uint8_t byte_mask = (mask >> (8 * (size - i - 1)));
346         if (byte_mask) {
347             tctx->regs[reg_offset + i] = (value >> (8 * (size - i - 1))) &
348                 byte_mask;
349         }
350     }
351 }
352 
353 static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
354 {
355     uint8_t ring_offset = offset & TM_RING_OFFSET;
356     uint8_t reg_offset = offset & TM_REG_OFFSET;
357     uint64_t mask = xive_tm_mask(offset, size, false);
358     uint64_t ret;
359     int i;
360 
361     /*
362      * Only 4 or 8 bytes loads are allowed and the User ring is
363      * excluded
364      */
365     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
366         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%"
367                       HWADDR_PRIx"\n", offset);
368         return -1;
369     }
370 
371     /* Use the register offset for the raw values */
372     ret = 0;
373     for (i = 0; i < size; i++) {
374         ret |= (uint64_t) tctx->regs[reg_offset + i] << (8 * (size - i - 1));
375     }
376 
377     /* filter out reserved values */
378     return ret & mask;
379 }
380 
381 /*
382  * The TM context is mapped twice within each page. Stores and loads
383  * to the first mapping below 2K write and read the specified values
384  * without modification. The second mapping above 2K performs specific
385  * state changes (side effects) in addition to setting/returning the
386  * interrupt management area context of the processor thread.
387  */
388 static uint64_t xive_tm_ack_os_reg(XivePresenter *xptr, XiveTCTX *tctx,
389                                    hwaddr offset, unsigned size)
390 {
391     return xive_tctx_accept(tctx, TM_QW1_OS);
392 }
393 
394 static void xive_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
395                                 hwaddr offset, uint64_t value, unsigned size)
396 {
397     xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
398 }
399 
400 static void xive_tctx_set_lgs(XiveTCTX *tctx, uint8_t ring, uint8_t lgs)
401 {
402     uint8_t *regs = &tctx->regs[ring];
403 
404     regs[TM_LGS] = lgs;
405 }
406 
407 static void xive_tm_set_os_lgs(XivePresenter *xptr, XiveTCTX *tctx,
408                           hwaddr offset, uint64_t value, unsigned size)
409 {
410     xive_tctx_set_lgs(tctx, TM_QW1_OS, value & 0xff);
411 }
412 
413 /*
414  * Adjust the IPB to allow a CPU to process event queues of other
415  * priorities during one physical interrupt cycle.
416  */
417 static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
418                                    hwaddr offset, uint64_t value, unsigned size)
419 {
420     xive_tctx_ipb_update(tctx, TM_QW1_OS, xive_priority_to_ipb(value & 0xff));
421 }
422 
423 static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk,
424                                uint32_t *nvt_idx, bool *vo)
425 {
426     if (nvt_blk) {
427         *nvt_blk = xive_nvt_blk(cam);
428     }
429     if (nvt_idx) {
430         *nvt_idx = xive_nvt_idx(cam);
431     }
432     if (vo) {
433         *vo = !!(cam & TM_QW1W2_VO);
434     }
435 }
436 
437 static uint32_t xive_tctx_get_os_cam(XiveTCTX *tctx, uint8_t *nvt_blk,
438                                      uint32_t *nvt_idx, bool *vo)
439 {
440     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
441     uint32_t cam = be32_to_cpu(qw1w2);
442 
443     xive_os_cam_decode(cam, nvt_blk, nvt_idx, vo);
444     return qw1w2;
445 }
446 
447 static void xive_tctx_set_os_cam(XiveTCTX *tctx, uint32_t qw1w2)
448 {
449     memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &qw1w2, 4);
450 }
451 
452 static uint64_t xive_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
453                                     hwaddr offset, unsigned size)
454 {
455     uint32_t qw1w2;
456     uint32_t qw1w2_new;
457     uint8_t nvt_blk;
458     uint32_t nvt_idx;
459     bool vo;
460 
461     qw1w2 = xive_tctx_get_os_cam(tctx, &nvt_blk, &nvt_idx, &vo);
462 
463     if (!vo) {
464         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid NVT %x/%x !?\n",
465                       nvt_blk, nvt_idx);
466     }
467 
468     /* Invalidate CAM line */
469     qw1w2_new = xive_set_field32(TM_QW1W2_VO, qw1w2, 0);
470     xive_tctx_set_os_cam(tctx, qw1w2_new);
471 
472     xive_tctx_reset_signal(tctx, TM_QW1_OS);
473     return qw1w2;
474 }
475 
476 static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx,
477                                   uint8_t nvt_blk, uint32_t nvt_idx)
478 {
479     XiveNVT nvt;
480     uint8_t ipb;
481 
482     /*
483      * Grab the associated NVT to pull the pending bits, and merge
484      * them with the IPB of the thread interrupt context registers
485      */
486     if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
487         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVT %x/%x\n",
488                           nvt_blk, nvt_idx);
489         return;
490     }
491 
492     ipb = xive_get_field32(NVT_W4_IPB, nvt.w4);
493 
494     if (ipb) {
495         /* Reset the NVT value */
496         nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, 0);
497         xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
498     }
499     /*
500      * Always call xive_tctx_ipb_update(). Even if there were no
501      * escalation triggered, there could be a pending interrupt which
502      * was saved when the context was pulled and that we need to take
503      * into account by recalculating the PIPR (which is not
504      * saved/restored).
505      * It will also raise the External interrupt signal if needed.
506      */
507     xive_tctx_ipb_update(tctx, TM_QW1_OS, ipb);
508 }
509 
510 /*
511  * Updating the OS CAM line can trigger a resend of interrupt
512  */
513 static void xive_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
514                                 hwaddr offset, uint64_t value, unsigned size)
515 {
516     uint32_t cam = value;
517     uint32_t qw1w2 = cpu_to_be32(cam);
518     uint8_t nvt_blk;
519     uint32_t nvt_idx;
520     bool vo;
521 
522     xive_os_cam_decode(cam, &nvt_blk, &nvt_idx, &vo);
523 
524     /* First update the registers */
525     xive_tctx_set_os_cam(tctx, qw1w2);
526 
527     /* Check the interrupt pending bits */
528     if (vo) {
529         xive_tctx_need_resend(XIVE_ROUTER(xptr), tctx, nvt_blk, nvt_idx);
530     }
531 }
532 
533 static uint32_t xive_presenter_get_config(XivePresenter *xptr)
534 {
535     XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
536 
537     return xpc->get_config(xptr);
538 }
539 
540 /*
541  * Define a mapping of "special" operations depending on the TIMA page
542  * offset and the size of the operation.
543  */
544 typedef struct XiveTmOp {
545     uint8_t  page_offset;
546     uint32_t op_offset;
547     unsigned size;
548     void     (*write_handler)(XivePresenter *xptr, XiveTCTX *tctx,
549                               hwaddr offset,
550                               uint64_t value, unsigned size);
551     uint64_t (*read_handler)(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
552                              unsigned size);
553 } XiveTmOp;
554 
555 static const XiveTmOp xive_tm_operations[] = {
556     /*
557      * MMIOs below 2K : raw values and special operations without side
558      * effects
559      */
560     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,       1, xive_tm_set_os_cppr,
561                                                      NULL },
562     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      4, xive_tm_push_os_ctx,
563                                                      NULL },
564     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR,  1, xive_tm_set_hv_cppr,
565                                                      NULL },
566     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push,
567                                                      NULL },
568     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL,
569                                                      xive_tm_vt_poll },
570 
571     /* MMIOs above 2K : special operations with side effects */
572     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,         2, NULL,
573                                                      xive_tm_ack_os_reg },
574     { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING,     1, xive_tm_set_os_pending,
575                                                      NULL },
576     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        4, NULL,
577                                                      xive_tm_pull_os_ctx },
578     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        8, NULL,
579                                                      xive_tm_pull_os_ctx },
580     { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG,         2, NULL,
581                                                      xive_tm_ack_hv_reg },
582     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      4, NULL,
583                                                      xive_tm_pull_pool_ctx },
584     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      8, NULL,
585                                                      xive_tm_pull_pool_ctx },
586     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX,      1, NULL,
587                                                      xive_tm_pull_phys_ctx },
588 };
589 
590 static const XiveTmOp xive2_tm_operations[] = {
591     /*
592      * MMIOs below 2K : raw values and special operations without side
593      * effects
594      */
595     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,       1, xive_tm_set_os_cppr,
596                                                      NULL },
597     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      4, xive2_tm_push_os_ctx,
598                                                      NULL },
599     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      8, xive2_tm_push_os_ctx,
600                                                      NULL },
601     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS,        1, xive_tm_set_os_lgs,
602                                                      NULL },
603     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR,  1, xive_tm_set_hv_cppr,
604                                                      NULL },
605     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push,
606                                                      NULL },
607     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL,
608                                                      xive_tm_vt_poll },
609     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_T,     1, xive2_tm_set_hv_target,
610                                                      NULL },
611 
612     /* MMIOs above 2K : special operations with side effects */
613     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,         2, NULL,
614                                                      xive_tm_ack_os_reg },
615     { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING,     1, xive_tm_set_os_pending,
616                                                      NULL },
617     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_G2,     4, NULL,
618                                                      xive2_tm_pull_os_ctx },
619     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        4, NULL,
620                                                      xive2_tm_pull_os_ctx },
621     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        8, NULL,
622                                                      xive2_tm_pull_os_ctx },
623     { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG,         2, NULL,
624                                                      xive_tm_ack_hv_reg },
625     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX_G2,   4, NULL,
626                                                      xive_tm_pull_pool_ctx },
627     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      4, NULL,
628                                                      xive_tm_pull_pool_ctx },
629     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      8, NULL,
630                                                      xive_tm_pull_pool_ctx },
631     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_OL,     1, xive2_tm_pull_os_ctx_ol,
632                                                      NULL },
633     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_G2,   4, NULL,
634                                                      xive_tm_pull_phys_ctx },
635     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX,      1, NULL,
636                                                      xive_tm_pull_phys_ctx },
637     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_OL,   1, xive2_tm_pull_phys_ctx_ol,
638                                                      NULL },
639 };
640 
641 static const XiveTmOp *xive_tm_find_op(XivePresenter *xptr, hwaddr offset,
642                                        unsigned size, bool write)
643 {
644     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
645     uint32_t op_offset = offset & TM_ADDRESS_MASK;
646     const XiveTmOp *tm_ops;
647     int i, tm_ops_count;
648     uint32_t cfg;
649 
650     cfg = xive_presenter_get_config(xptr);
651     if (cfg & XIVE_PRESENTER_GEN1_TIMA_OS) {
652         tm_ops = xive_tm_operations;
653         tm_ops_count = ARRAY_SIZE(xive_tm_operations);
654     } else {
655         tm_ops = xive2_tm_operations;
656         tm_ops_count = ARRAY_SIZE(xive2_tm_operations);
657     }
658 
659     for (i = 0; i < tm_ops_count; i++) {
660         const XiveTmOp *xto = &tm_ops[i];
661 
662         /* Accesses done from a more privileged TIMA page is allowed */
663         if (xto->page_offset >= page_offset &&
664             xto->op_offset == op_offset &&
665             xto->size == size &&
666             ((write && xto->write_handler) || (!write && xto->read_handler))) {
667             return xto;
668         }
669     }
670     return NULL;
671 }
672 
673 /*
674  * TIMA MMIO handlers
675  */
676 void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
677                         uint64_t value, unsigned size)
678 {
679     const XiveTmOp *xto;
680 
681     trace_xive_tctx_tm_write(tctx->cs->cpu_index, offset, size, value);
682 
683     /*
684      * TODO: check V bit in Q[0-3]W2
685      */
686 
687     /*
688      * First, check for special operations in the 2K region
689      */
690     if (offset & TM_SPECIAL_OP) {
691         xto = xive_tm_find_op(tctx->xptr, offset, size, true);
692         if (!xto) {
693             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA "
694                           "@%"HWADDR_PRIx"\n", offset);
695         } else {
696             xto->write_handler(xptr, tctx, offset, value, size);
697         }
698         return;
699     }
700 
701     /*
702      * Then, for special operations in the region below 2K.
703      */
704     xto = xive_tm_find_op(tctx->xptr, offset, size, true);
705     if (xto) {
706         xto->write_handler(xptr, tctx, offset, value, size);
707         return;
708     }
709 
710     /*
711      * Finish with raw access to the register values
712      */
713     xive_tm_raw_write(tctx, offset, value, size);
714 }
715 
716 uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
717                            unsigned size)
718 {
719     const XiveTmOp *xto;
720     uint64_t ret;
721 
722     /*
723      * TODO: check V bit in Q[0-3]W2
724      */
725 
726     /*
727      * First, check for special operations in the 2K region
728      */
729     if (offset & TM_SPECIAL_OP) {
730         xto = xive_tm_find_op(tctx->xptr, offset, size, false);
731         if (!xto) {
732             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
733                           "@%"HWADDR_PRIx"\n", offset);
734             return -1;
735         }
736         ret = xto->read_handler(xptr, tctx, offset, size);
737         goto out;
738     }
739 
740     /*
741      * Then, for special operations in the region below 2K.
742      */
743     xto = xive_tm_find_op(tctx->xptr, offset, size, false);
744     if (xto) {
745         ret = xto->read_handler(xptr, tctx, offset, size);
746         goto out;
747     }
748 
749     /*
750      * Finish with raw access to the register values
751      */
752     ret = xive_tm_raw_read(tctx, offset, size);
753 out:
754     trace_xive_tctx_tm_read(tctx->cs->cpu_index, offset, size, ret);
755     return ret;
756 }
757 
758 static char *xive_tctx_ring_print(uint8_t *ring)
759 {
760     uint32_t w2 = xive_tctx_word2(ring);
761 
762     return g_strdup_printf("%02x   %02x  %02x    %02x   %02x  "
763                    "%02x  %02x   %02x  %08x",
764                    ring[TM_NSR], ring[TM_CPPR], ring[TM_IPB], ring[TM_LSMFB],
765                    ring[TM_ACK_CNT], ring[TM_INC], ring[TM_AGE], ring[TM_PIPR],
766                    be32_to_cpu(w2));
767 }
768 
769 static const char * const xive_tctx_ring_names[] = {
770     "USER", "OS", "POOL", "PHYS",
771 };
772 
773 /*
774  * kvm_irqchip_in_kernel() will cause the compiler to turn this
775  * info a nop if CONFIG_KVM isn't defined.
776  */
777 #define xive_in_kernel(xptr)                                            \
778     (kvm_irqchip_in_kernel() &&                                         \
779      ({                                                                 \
780          XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);      \
781          xpc->in_kernel ? xpc->in_kernel(xptr) : false;                 \
782      }))
783 
784 void xive_tctx_pic_print_info(XiveTCTX *tctx, GString *buf)
785 {
786     int cpu_index;
787     int i;
788 
789     /* Skip partially initialized vCPUs. This can happen on sPAPR when vCPUs
790      * are hot plugged or unplugged.
791      */
792     if (!tctx) {
793         return;
794     }
795 
796     cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
797 
798     if (xive_in_kernel(tctx->xptr)) {
799         Error *local_err = NULL;
800 
801         kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
802         if (local_err) {
803             error_report_err(local_err);
804             return;
805         }
806     }
807 
808     if (xive_presenter_get_config(tctx->xptr) & XIVE_PRESENTER_GEN1_TIMA_OS) {
809         g_string_append_printf(buf, "CPU[%04x]:   "
810                                "QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
811                                "  W2\n", cpu_index);
812     } else {
813         g_string_append_printf(buf, "CPU[%04x]:   "
814                                "QW   NSR CPPR IPB LSMFB   -  LGS  T  PIPR"
815                                "  W2\n", cpu_index);
816     }
817 
818     for (i = 0; i < XIVE_TM_RING_COUNT; i++) {
819         char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]);
820         g_string_append_printf(buf, "CPU[%04x]: %4s    %s\n",
821                                cpu_index, xive_tctx_ring_names[i], s);
822         g_free(s);
823     }
824 }
825 
826 void xive_tctx_reset(XiveTCTX *tctx)
827 {
828     memset(tctx->regs, 0, sizeof(tctx->regs));
829 
830     /* Set some defaults */
831     tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
832     tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
833     tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
834     if (!(xive_presenter_get_config(tctx->xptr) &
835           XIVE_PRESENTER_GEN1_TIMA_OS)) {
836         tctx->regs[TM_QW1_OS + TM_OGEN] = 2;
837     }
838 
839     /*
840      * Initialize PIPR to 0xFF to avoid phantom interrupts when the
841      * CPPR is first set.
842      */
843     tctx->regs[TM_QW1_OS + TM_PIPR] =
844         ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
845     tctx->regs[TM_QW3_HV_PHYS + TM_PIPR] =
846         ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]);
847 }
848 
849 static void xive_tctx_realize(DeviceState *dev, Error **errp)
850 {
851     XiveTCTX *tctx = XIVE_TCTX(dev);
852     PowerPCCPU *cpu;
853     CPUPPCState *env;
854 
855     assert(tctx->cs);
856     assert(tctx->xptr);
857 
858     cpu = POWERPC_CPU(tctx->cs);
859     env = &cpu->env;
860     switch (PPC_INPUT(env)) {
861     case PPC_FLAGS_INPUT_POWER9:
862         tctx->hv_output = qdev_get_gpio_in(DEVICE(cpu), POWER9_INPUT_HINT);
863         tctx->os_output = qdev_get_gpio_in(DEVICE(cpu), POWER9_INPUT_INT);
864         break;
865 
866     default:
867         error_setg(errp, "XIVE interrupt controller does not support "
868                    "this CPU bus model");
869         return;
870     }
871 
872     /* Connect the presenter to the VCPU (required for CPU hotplug) */
873     if (xive_in_kernel(tctx->xptr)) {
874         if (kvmppc_xive_cpu_connect(tctx, errp) < 0) {
875             return;
876         }
877     }
878 }
879 
880 static int vmstate_xive_tctx_pre_save(void *opaque)
881 {
882     XiveTCTX *tctx = XIVE_TCTX(opaque);
883     Error *local_err = NULL;
884     int ret;
885 
886     if (xive_in_kernel(tctx->xptr)) {
887         ret = kvmppc_xive_cpu_get_state(tctx, &local_err);
888         if (ret < 0) {
889             error_report_err(local_err);
890             return ret;
891         }
892     }
893 
894     return 0;
895 }
896 
897 static int vmstate_xive_tctx_post_load(void *opaque, int version_id)
898 {
899     XiveTCTX *tctx = XIVE_TCTX(opaque);
900     Error *local_err = NULL;
901     int ret;
902 
903     if (xive_in_kernel(tctx->xptr)) {
904         /*
905          * Required for hotplugged CPU, for which the state comes
906          * after all states of the machine.
907          */
908         ret = kvmppc_xive_cpu_set_state(tctx, &local_err);
909         if (ret < 0) {
910             error_report_err(local_err);
911             return ret;
912         }
913     }
914 
915     return 0;
916 }
917 
918 static const VMStateDescription vmstate_xive_tctx = {
919     .name = TYPE_XIVE_TCTX,
920     .version_id = 1,
921     .minimum_version_id = 1,
922     .pre_save = vmstate_xive_tctx_pre_save,
923     .post_load = vmstate_xive_tctx_post_load,
924     .fields = (const VMStateField[]) {
925         VMSTATE_BUFFER(regs, XiveTCTX),
926         VMSTATE_END_OF_LIST()
927     },
928 };
929 
930 static const Property xive_tctx_properties[] = {
931     DEFINE_PROP_LINK("cpu", XiveTCTX, cs, TYPE_CPU, CPUState *),
932     DEFINE_PROP_LINK("presenter", XiveTCTX, xptr, TYPE_XIVE_PRESENTER,
933                      XivePresenter *),
934 };
935 
936 static void xive_tctx_class_init(ObjectClass *klass, void *data)
937 {
938     DeviceClass *dc = DEVICE_CLASS(klass);
939 
940     dc->desc = "XIVE Interrupt Thread Context";
941     dc->realize = xive_tctx_realize;
942     dc->vmsd = &vmstate_xive_tctx;
943     device_class_set_props(dc, xive_tctx_properties);
944     /*
945      * Reason: part of XIVE interrupt controller, needs to be wired up
946      * by xive_tctx_create().
947      */
948     dc->user_creatable = false;
949 }
950 
951 static const TypeInfo xive_tctx_info = {
952     .name          = TYPE_XIVE_TCTX,
953     .parent        = TYPE_DEVICE,
954     .instance_size = sizeof(XiveTCTX),
955     .class_init    = xive_tctx_class_init,
956 };
957 
958 Object *xive_tctx_create(Object *cpu, XivePresenter *xptr, Error **errp)
959 {
960     Object *obj;
961 
962     obj = object_new(TYPE_XIVE_TCTX);
963     object_property_add_child(cpu, TYPE_XIVE_TCTX, obj);
964     object_unref(obj);
965     object_property_set_link(obj, "cpu", cpu, &error_abort);
966     object_property_set_link(obj, "presenter", OBJECT(xptr), &error_abort);
967     if (!qdev_realize(DEVICE(obj), NULL, errp)) {
968         object_unparent(obj);
969         return NULL;
970     }
971     return obj;
972 }
973 
974 void xive_tctx_destroy(XiveTCTX *tctx)
975 {
976     Object *obj = OBJECT(tctx);
977 
978     object_unparent(obj);
979 }
980 
981 /*
982  * XIVE ESB helpers
983  */
984 
985 uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
986 {
987     uint8_t old_pq = *pq & 0x3;
988 
989     *pq &= ~0x3;
990     *pq |= value & 0x3;
991 
992     return old_pq;
993 }
994 
995 bool xive_esb_trigger(uint8_t *pq)
996 {
997     uint8_t old_pq = *pq & 0x3;
998 
999     switch (old_pq) {
1000     case XIVE_ESB_RESET:
1001         xive_esb_set(pq, XIVE_ESB_PENDING);
1002         return true;
1003     case XIVE_ESB_PENDING:
1004     case XIVE_ESB_QUEUED:
1005         xive_esb_set(pq, XIVE_ESB_QUEUED);
1006         return false;
1007     case XIVE_ESB_OFF:
1008         xive_esb_set(pq, XIVE_ESB_OFF);
1009         return false;
1010     default:
1011          g_assert_not_reached();
1012     }
1013 }
1014 
1015 bool xive_esb_eoi(uint8_t *pq)
1016 {
1017     uint8_t old_pq = *pq & 0x3;
1018 
1019     switch (old_pq) {
1020     case XIVE_ESB_RESET:
1021     case XIVE_ESB_PENDING:
1022         xive_esb_set(pq, XIVE_ESB_RESET);
1023         return false;
1024     case XIVE_ESB_QUEUED:
1025         xive_esb_set(pq, XIVE_ESB_PENDING);
1026         return true;
1027     case XIVE_ESB_OFF:
1028         xive_esb_set(pq, XIVE_ESB_OFF);
1029         return false;
1030     default:
1031          g_assert_not_reached();
1032     }
1033 }
1034 
1035 /*
1036  * XIVE Interrupt Source (or IVSE)
1037  */
1038 
1039 uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
1040 {
1041     assert(srcno < xsrc->nr_irqs);
1042 
1043     return xsrc->status[srcno] & 0x3;
1044 }
1045 
1046 uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
1047 {
1048     assert(srcno < xsrc->nr_irqs);
1049 
1050     return xive_esb_set(&xsrc->status[srcno], pq);
1051 }
1052 
1053 /*
1054  * Returns whether the event notification should be forwarded.
1055  */
1056 static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno)
1057 {
1058     uint8_t old_pq = xive_source_esb_get(xsrc, srcno);
1059 
1060     xive_source_set_asserted(xsrc, srcno, true);
1061 
1062     switch (old_pq) {
1063     case XIVE_ESB_RESET:
1064         xive_source_esb_set(xsrc, srcno, XIVE_ESB_PENDING);
1065         return true;
1066     default:
1067         return false;
1068     }
1069 }
1070 
1071 /*
1072  * Sources can be configured with PQ offloading in which case the check
1073  * on the PQ state bits of MSIs is disabled
1074  */
1075 static bool xive_source_esb_disabled(XiveSource *xsrc, uint32_t srcno)
1076 {
1077     return (xsrc->esb_flags & XIVE_SRC_PQ_DISABLE) &&
1078         !xive_source_irq_is_lsi(xsrc, srcno);
1079 }
1080 
1081 /*
1082  * Returns whether the event notification should be forwarded.
1083  */
1084 static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
1085 {
1086     bool ret;
1087 
1088     assert(srcno < xsrc->nr_irqs);
1089 
1090     if (xive_source_esb_disabled(xsrc, srcno)) {
1091         return true;
1092     }
1093 
1094     ret = xive_esb_trigger(&xsrc->status[srcno]);
1095 
1096     if (xive_source_irq_is_lsi(xsrc, srcno) &&
1097         xive_source_esb_get(xsrc, srcno) == XIVE_ESB_QUEUED) {
1098         qemu_log_mask(LOG_GUEST_ERROR,
1099                       "XIVE: queued an event on LSI IRQ %d\n", srcno);
1100     }
1101 
1102     return ret;
1103 }
1104 
1105 /*
1106  * Returns whether the event notification should be forwarded.
1107  */
1108 static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
1109 {
1110     bool ret;
1111 
1112     assert(srcno < xsrc->nr_irqs);
1113 
1114     if (xive_source_esb_disabled(xsrc, srcno)) {
1115         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid EOI for IRQ %d\n", srcno);
1116         return false;
1117     }
1118 
1119     ret = xive_esb_eoi(&xsrc->status[srcno]);
1120 
1121     /*
1122      * LSI sources do not set the Q bit but they can still be
1123      * asserted, in which case we should forward a new event
1124      * notification
1125      */
1126     if (xive_source_irq_is_lsi(xsrc, srcno) &&
1127         xive_source_is_asserted(xsrc, srcno)) {
1128         ret = xive_source_lsi_trigger(xsrc, srcno);
1129     }
1130 
1131     return ret;
1132 }
1133 
1134 /*
1135  * Forward the source event notification to the Router
1136  */
1137 static void xive_source_notify(XiveSource *xsrc, int srcno)
1138 {
1139     XiveNotifierClass *xnc = XIVE_NOTIFIER_GET_CLASS(xsrc->xive);
1140     bool pq_checked = !xive_source_esb_disabled(xsrc, srcno);
1141 
1142     if (xnc->notify) {
1143         xnc->notify(xsrc->xive, srcno, pq_checked);
1144     }
1145 }
1146 
1147 /*
1148  * In a two pages ESB MMIO setting, even page is the trigger page, odd
1149  * page is for management
1150  */
1151 static inline bool addr_is_even(hwaddr addr, uint32_t shift)
1152 {
1153     return !((addr >> shift) & 1);
1154 }
1155 
1156 static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
1157 {
1158     return xive_source_esb_has_2page(xsrc) &&
1159         addr_is_even(addr, xsrc->esb_shift - 1);
1160 }
1161 
1162 /*
1163  * ESB MMIO loads
1164  *                      Trigger page    Management/EOI page
1165  *
1166  * ESB MMIO setting     2 pages         1 or 2 pages
1167  *
1168  * 0x000 .. 0x3FF       -1              EOI and return 0|1
1169  * 0x400 .. 0x7FF       -1              EOI and return 0|1
1170  * 0x800 .. 0xBFF       -1              return PQ
1171  * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=00
1172  * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=01
1173  * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=10
1174  * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=11
1175  */
1176 static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
1177 {
1178     XiveSource *xsrc = XIVE_SOURCE(opaque);
1179     uint32_t offset = addr & 0xFFF;
1180     uint32_t srcno = addr >> xsrc->esb_shift;
1181     uint64_t ret = -1;
1182 
1183     /* In a two pages ESB MMIO setting, trigger page should not be read */
1184     if (xive_source_is_trigger_page(xsrc, addr)) {
1185         qemu_log_mask(LOG_GUEST_ERROR,
1186                       "XIVE: invalid load on IRQ %d trigger page at "
1187                       "0x%"HWADDR_PRIx"\n", srcno, addr);
1188         return -1;
1189     }
1190 
1191     switch (offset) {
1192     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
1193         ret = xive_source_esb_eoi(xsrc, srcno);
1194 
1195         /* Forward the source event notification for routing */
1196         if (ret) {
1197             xive_source_notify(xsrc, srcno);
1198         }
1199         break;
1200 
1201     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
1202         ret = xive_source_esb_get(xsrc, srcno);
1203         break;
1204 
1205     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1206     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1207     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1208     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1209         ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
1210         break;
1211     default:
1212         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
1213                       offset);
1214     }
1215 
1216     trace_xive_source_esb_read(addr, srcno, ret);
1217 
1218     return ret;
1219 }
1220 
1221 /*
1222  * ESB MMIO stores
1223  *                      Trigger page    Management/EOI page
1224  *
1225  * ESB MMIO setting     2 pages         1 or 2 pages
1226  *
1227  * 0x000 .. 0x3FF       Trigger         Trigger
1228  * 0x400 .. 0x7FF       Trigger         EOI
1229  * 0x800 .. 0xBFF       Trigger         undefined
1230  * 0xC00 .. 0xCFF       Trigger         PQ=00
1231  * 0xD00 .. 0xDFF       Trigger         PQ=01
1232  * 0xE00 .. 0xDFF       Trigger         PQ=10
1233  * 0xF00 .. 0xDFF       Trigger         PQ=11
1234  */
1235 static void xive_source_esb_write(void *opaque, hwaddr addr,
1236                                   uint64_t value, unsigned size)
1237 {
1238     XiveSource *xsrc = XIVE_SOURCE(opaque);
1239     uint32_t offset = addr & 0xFFF;
1240     uint32_t srcno = addr >> xsrc->esb_shift;
1241     bool notify = false;
1242 
1243     trace_xive_source_esb_write(addr, srcno, value);
1244 
1245     /* In a two pages ESB MMIO setting, trigger page only triggers */
1246     if (xive_source_is_trigger_page(xsrc, addr)) {
1247         notify = xive_source_esb_trigger(xsrc, srcno);
1248         goto out;
1249     }
1250 
1251     switch (offset) {
1252     case 0 ... 0x3FF:
1253         notify = xive_source_esb_trigger(xsrc, srcno);
1254         break;
1255 
1256     case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
1257         if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
1258             qemu_log_mask(LOG_GUEST_ERROR,
1259                           "XIVE: invalid Store EOI for IRQ %d\n", srcno);
1260             return;
1261         }
1262 
1263         notify = xive_source_esb_eoi(xsrc, srcno);
1264         break;
1265 
1266     /*
1267      * This is an internal offset used to inject triggers when the PQ
1268      * state bits are not controlled locally. Such as for LSIs when
1269      * under ABT mode.
1270      */
1271     case XIVE_ESB_INJECT ... XIVE_ESB_INJECT + 0x3FF:
1272         notify = true;
1273         break;
1274 
1275     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1276     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1277     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1278     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1279         xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
1280         break;
1281 
1282     default:
1283         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
1284                       offset);
1285         return;
1286     }
1287 
1288 out:
1289     /* Forward the source event notification for routing */
1290     if (notify) {
1291         xive_source_notify(xsrc, srcno);
1292     }
1293 }
1294 
1295 static const MemoryRegionOps xive_source_esb_ops = {
1296     .read = xive_source_esb_read,
1297     .write = xive_source_esb_write,
1298     .endianness = DEVICE_BIG_ENDIAN,
1299     .valid = {
1300         .min_access_size = 1,
1301         .max_access_size = 8,
1302     },
1303     .impl = {
1304         .min_access_size = 1,
1305         .max_access_size = 8,
1306     },
1307 };
1308 
1309 void xive_source_set_irq(void *opaque, int srcno, int val)
1310 {
1311     XiveSource *xsrc = XIVE_SOURCE(opaque);
1312     bool notify = false;
1313 
1314     if (xive_source_irq_is_lsi(xsrc, srcno)) {
1315         if (val) {
1316             notify = xive_source_lsi_trigger(xsrc, srcno);
1317         } else {
1318             xive_source_set_asserted(xsrc, srcno, false);
1319         }
1320     } else {
1321         if (val) {
1322             notify = xive_source_esb_trigger(xsrc, srcno);
1323         }
1324     }
1325 
1326     /* Forward the source event notification for routing */
1327     if (notify) {
1328         xive_source_notify(xsrc, srcno);
1329     }
1330 }
1331 
1332 void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, GString *buf)
1333 {
1334     for (unsigned i = 0; i < xsrc->nr_irqs; i++) {
1335         uint8_t pq = xive_source_esb_get(xsrc, i);
1336 
1337         if (pq == XIVE_ESB_OFF) {
1338             continue;
1339         }
1340 
1341         g_string_append_printf(buf, "  %08x %s %c%c%c\n", i + offset,
1342                                xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
1343                                pq & XIVE_ESB_VAL_P ? 'P' : '-',
1344                                pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1345                                xive_source_is_asserted(xsrc, i) ? 'A' : ' ');
1346     }
1347 }
1348 
1349 static void xive_source_reset(void *dev)
1350 {
1351     XiveSource *xsrc = XIVE_SOURCE(dev);
1352 
1353     /* Do not clear the LSI bitmap */
1354 
1355     memset(xsrc->status, xsrc->reset_pq, xsrc->nr_irqs);
1356 }
1357 
1358 static void xive_source_realize(DeviceState *dev, Error **errp)
1359 {
1360     XiveSource *xsrc = XIVE_SOURCE(dev);
1361     uint64_t esb_len = xive_source_esb_len(xsrc);
1362 
1363     assert(xsrc->xive);
1364 
1365     if (!xsrc->nr_irqs) {
1366         error_setg(errp, "Number of interrupt needs to be greater than 0");
1367         return;
1368     }
1369 
1370     if (xsrc->esb_shift != XIVE_ESB_4K &&
1371         xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
1372         xsrc->esb_shift != XIVE_ESB_64K &&
1373         xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
1374         error_setg(errp, "Invalid ESB shift setting");
1375         return;
1376     }
1377 
1378     xsrc->status = g_malloc0(xsrc->nr_irqs);
1379     xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
1380 
1381     memory_region_init(&xsrc->esb_mmio, OBJECT(xsrc), "xive.esb", esb_len);
1382     memory_region_init_io(&xsrc->esb_mmio_emulated, OBJECT(xsrc),
1383                           &xive_source_esb_ops, xsrc, "xive.esb-emulated",
1384                           esb_len);
1385     memory_region_add_subregion(&xsrc->esb_mmio, 0, &xsrc->esb_mmio_emulated);
1386 
1387     qemu_register_reset(xive_source_reset, dev);
1388 }
1389 
1390 static const VMStateDescription vmstate_xive_source = {
1391     .name = TYPE_XIVE_SOURCE,
1392     .version_id = 1,
1393     .minimum_version_id = 1,
1394     .fields = (const VMStateField[]) {
1395         VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
1396         VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
1397         VMSTATE_END_OF_LIST()
1398     },
1399 };
1400 
1401 /*
1402  * The default XIVE interrupt source setting for the ESB MMIOs is two
1403  * 64k pages without Store EOI, to be in sync with KVM.
1404  */
1405 static const Property xive_source_properties[] = {
1406     DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
1407     DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
1408     DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
1409     /*
1410      * By default, PQs are initialized to 0b01 (Q=1) which corresponds
1411      * to "ints off"
1412      */
1413     DEFINE_PROP_UINT8("reset-pq", XiveSource, reset_pq, XIVE_ESB_OFF),
1414     DEFINE_PROP_LINK("xive", XiveSource, xive, TYPE_XIVE_NOTIFIER,
1415                      XiveNotifier *),
1416 };
1417 
1418 static void xive_source_class_init(ObjectClass *klass, void *data)
1419 {
1420     DeviceClass *dc = DEVICE_CLASS(klass);
1421 
1422     dc->desc    = "XIVE Interrupt Source";
1423     device_class_set_props(dc, xive_source_properties);
1424     dc->realize = xive_source_realize;
1425     dc->vmsd    = &vmstate_xive_source;
1426     /*
1427      * Reason: part of XIVE interrupt controller, needs to be wired up,
1428      * e.g. by spapr_xive_instance_init().
1429      */
1430     dc->user_creatable = false;
1431 }
1432 
1433 static const TypeInfo xive_source_info = {
1434     .name          = TYPE_XIVE_SOURCE,
1435     .parent        = TYPE_DEVICE,
1436     .instance_size = sizeof(XiveSource),
1437     .class_init    = xive_source_class_init,
1438 };
1439 
1440 /*
1441  * XiveEND helpers
1442  */
1443 
1444 void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, GString *buf)
1445 {
1446     uint64_t qaddr_base = xive_end_qaddr(end);
1447     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1448     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1449     uint32_t qentries = 1 << (qsize + 10);
1450     int i;
1451 
1452     /*
1453      * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
1454      */
1455     g_string_append_printf(buf, " [ ");
1456     qindex = (qindex - (width - 1)) & (qentries - 1);
1457     for (i = 0; i < width; i++) {
1458         uint64_t qaddr = qaddr_base + (qindex << 2);
1459         uint32_t qdata = -1;
1460 
1461         if (dma_memory_read(&address_space_memory, qaddr,
1462                             &qdata, sizeof(qdata), MEMTXATTRS_UNSPECIFIED)) {
1463             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
1464                           HWADDR_PRIx "\n", qaddr);
1465             return;
1466         }
1467         g_string_append_printf(buf, "%s%08x ", i == width - 1 ? "^" : "",
1468                                be32_to_cpu(qdata));
1469         qindex = (qindex + 1) & (qentries - 1);
1470     }
1471     g_string_append_c(buf, ']');
1472 }
1473 
1474 void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, GString *buf)
1475 {
1476     uint64_t qaddr_base = xive_end_qaddr(end);
1477     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1478     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1479     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1480     uint32_t qentries = 1 << (qsize + 10);
1481 
1482     uint32_t nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
1483     uint32_t nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end->w6);
1484     uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
1485     uint8_t pq;
1486 
1487     if (!xive_end_is_valid(end)) {
1488         return;
1489     }
1490 
1491     pq = xive_get_field32(END_W1_ESn, end->w1);
1492 
1493     g_string_append_printf(buf,
1494                            "  %08x %c%c %c%c%c%c%c%c%c%c prio:%d nvt:%02x/%04x",
1495                            end_idx,
1496                            pq & XIVE_ESB_VAL_P ? 'P' : '-',
1497                            pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1498                            xive_end_is_valid(end)    ? 'v' : '-',
1499                            xive_end_is_enqueue(end)  ? 'q' : '-',
1500                            xive_end_is_notify(end)   ? 'n' : '-',
1501                            xive_end_is_backlog(end)  ? 'b' : '-',
1502                            xive_end_is_escalate(end) ? 'e' : '-',
1503                            xive_end_is_uncond_escalation(end)   ? 'u' : '-',
1504                            xive_end_is_silent_escalation(end)   ? 's' : '-',
1505                            xive_end_is_firmware(end)   ? 'f' : '-',
1506                            priority, nvt_blk, nvt_idx);
1507 
1508     if (qaddr_base) {
1509         g_string_append_printf(buf, " eq:@%08"PRIx64"% 6d/%5d ^%d",
1510                                qaddr_base, qindex, qentries, qgen);
1511         xive_end_queue_pic_print_info(end, 6, buf);
1512     }
1513     g_string_append_c(buf, '\n');
1514 }
1515 
1516 static void xive_end_enqueue(XiveEND *end, uint32_t data)
1517 {
1518     uint64_t qaddr_base = xive_end_qaddr(end);
1519     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1520     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1521     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1522 
1523     uint64_t qaddr = qaddr_base + (qindex << 2);
1524     uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
1525     uint32_t qentries = 1 << (qsize + 10);
1526 
1527     if (dma_memory_write(&address_space_memory, qaddr,
1528                          &qdata, sizeof(qdata), MEMTXATTRS_UNSPECIFIED)) {
1529         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
1530                       HWADDR_PRIx "\n", qaddr);
1531         return;
1532     }
1533 
1534     qindex = (qindex + 1) & (qentries - 1);
1535     if (qindex == 0) {
1536         qgen ^= 1;
1537         end->w1 = xive_set_field32(END_W1_GENERATION, end->w1, qgen);
1538     }
1539     end->w1 = xive_set_field32(END_W1_PAGE_OFF, end->w1, qindex);
1540 }
1541 
1542 void xive_end_eas_pic_print_info(XiveEND *end, uint32_t end_idx, GString *buf)
1543 {
1544     XiveEAS *eas = (XiveEAS *) &end->w4;
1545     uint8_t pq;
1546 
1547     if (!xive_end_is_escalate(end)) {
1548         return;
1549     }
1550 
1551     pq = xive_get_field32(END_W1_ESe, end->w1);
1552 
1553     g_string_append_printf(buf, "  %08x %c%c %c%c end:%02x/%04x data:%08x\n",
1554                            end_idx,
1555                            pq & XIVE_ESB_VAL_P ? 'P' : '-',
1556                            pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1557                            xive_eas_is_valid(eas) ? 'V' : ' ',
1558                            xive_eas_is_masked(eas) ? 'M' : ' ',
1559                            (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
1560                            (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
1561                            (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
1562 }
1563 
1564 /*
1565  * XIVE Router (aka. Virtualization Controller or IVRE)
1566  */
1567 
1568 int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1569                         XiveEAS *eas)
1570 {
1571     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1572 
1573     return xrc->get_eas(xrtr, eas_blk, eas_idx, eas);
1574 }
1575 
1576 static
1577 int xive_router_get_pq(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1578                        uint8_t *pq)
1579 {
1580     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1581 
1582     return xrc->get_pq(xrtr, eas_blk, eas_idx, pq);
1583 }
1584 
1585 static
1586 int xive_router_set_pq(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1587                        uint8_t *pq)
1588 {
1589     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1590 
1591     return xrc->set_pq(xrtr, eas_blk, eas_idx, pq);
1592 }
1593 
1594 int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1595                         XiveEND *end)
1596 {
1597    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1598 
1599    return xrc->get_end(xrtr, end_blk, end_idx, end);
1600 }
1601 
1602 int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1603                           XiveEND *end, uint8_t word_number)
1604 {
1605    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1606 
1607    return xrc->write_end(xrtr, end_blk, end_idx, end, word_number);
1608 }
1609 
1610 int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1611                         XiveNVT *nvt)
1612 {
1613    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1614 
1615    return xrc->get_nvt(xrtr, nvt_blk, nvt_idx, nvt);
1616 }
1617 
1618 int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1619                         XiveNVT *nvt, uint8_t word_number)
1620 {
1621    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1622 
1623    return xrc->write_nvt(xrtr, nvt_blk, nvt_idx, nvt, word_number);
1624 }
1625 
1626 static int xive_router_get_block_id(XiveRouter *xrtr)
1627 {
1628    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1629 
1630    return xrc->get_block_id(xrtr);
1631 }
1632 
1633 static void xive_router_realize(DeviceState *dev, Error **errp)
1634 {
1635     XiveRouter *xrtr = XIVE_ROUTER(dev);
1636 
1637     assert(xrtr->xfb);
1638 }
1639 
1640 static void xive_router_end_notify_handler(XiveRouter *xrtr, XiveEAS *eas)
1641 {
1642     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1643 
1644     return xrc->end_notify(xrtr, eas);
1645 }
1646 
1647 /*
1648  * Encode the HW CAM line in the block group mode format :
1649  *
1650  *   chip << 19 | 0000000 0 0001 thread (7Bit)
1651  */
1652 static uint32_t xive_tctx_hw_cam_line(XivePresenter *xptr, XiveTCTX *tctx)
1653 {
1654     CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env;
1655     uint32_t pir = env->spr_cb[SPR_PIR].default_value;
1656     uint8_t blk = xive_router_get_block_id(XIVE_ROUTER(xptr));
1657 
1658     return xive_nvt_cam_line(blk, 1 << 7 | (pir & 0x7f));
1659 }
1660 
1661 /*
1662  * The thread context register words are in big-endian format.
1663  */
1664 int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
1665                               uint8_t format,
1666                               uint8_t nvt_blk, uint32_t nvt_idx,
1667                               bool cam_ignore, uint32_t logic_serv)
1668 {
1669     uint32_t cam = xive_nvt_cam_line(nvt_blk, nvt_idx);
1670     uint32_t qw3w2 = xive_tctx_word2(&tctx->regs[TM_QW3_HV_PHYS]);
1671     uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
1672     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
1673     uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]);
1674 
1675     /*
1676      * TODO (PowerNV): ignore mode. The low order bits of the NVT
1677      * identifier are ignored in the "CAM" match.
1678      */
1679 
1680     if (format == 0) {
1681         if (cam_ignore == true) {
1682             /*
1683              * F=0 & i=1: Logical server notification (bits ignored at
1684              * the end of the NVT identifier)
1685              */
1686             qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n",
1687                           nvt_blk, nvt_idx);
1688              return -1;
1689         }
1690 
1691         /* F=0 & i=0: Specific NVT notification */
1692 
1693         /* PHYS ring */
1694         if ((be32_to_cpu(qw3w2) & TM_QW3W2_VT) &&
1695             cam == xive_tctx_hw_cam_line(xptr, tctx)) {
1696             return TM_QW3_HV_PHYS;
1697         }
1698 
1699         /* HV POOL ring */
1700         if ((be32_to_cpu(qw2w2) & TM_QW2W2_VP) &&
1701             cam == xive_get_field32(TM_QW2W2_POOL_CAM, qw2w2)) {
1702             return TM_QW2_HV_POOL;
1703         }
1704 
1705         /* OS ring */
1706         if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1707             cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) {
1708             return TM_QW1_OS;
1709         }
1710     } else {
1711         /* F=1 : User level Event-Based Branch (EBB) notification */
1712 
1713         /* USER ring */
1714         if  ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1715              (cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) &&
1716              (be32_to_cpu(qw0w2) & TM_QW0W2_VU) &&
1717              (logic_serv == xive_get_field32(TM_QW0W2_LOGIC_SERV, qw0w2))) {
1718             return TM_QW0_USER;
1719         }
1720     }
1721     return -1;
1722 }
1723 
1724 /*
1725  * This is our simple Xive Presenter Engine model. It is merged in the
1726  * Router as it does not require an extra object.
1727  *
1728  * It receives notification requests sent by the IVRE to find one
1729  * matching NVT (or more) dispatched on the processor threads. In case
1730  * of a single NVT notification, the process is abbreviated and the
1731  * thread is signaled if a match is found. In case of a logical server
1732  * notification (bits ignored at the end of the NVT identifier), the
1733  * IVPE and IVRE select a winning thread using different filters. This
1734  * involves 2 or 3 exchanges on the PowerBus that the model does not
1735  * support.
1736  *
1737  * The parameters represent what is sent on the PowerBus
1738  */
1739 bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
1740                            uint8_t nvt_blk, uint32_t nvt_idx,
1741                            bool cam_ignore, uint8_t priority,
1742                            uint32_t logic_serv)
1743 {
1744     XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb);
1745     XiveTCTXMatch match = { .tctx = NULL, .ring = 0 };
1746     int count;
1747 
1748     /*
1749      * Ask the machine to scan the interrupt controllers for a match
1750      */
1751     count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, cam_ignore,
1752                            priority, logic_serv, &match);
1753     if (count < 0) {
1754         return false;
1755     }
1756 
1757     /* handle CPU exception delivery */
1758     if (count) {
1759         trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring);
1760         xive_tctx_ipb_update(match.tctx, match.ring,
1761                              xive_priority_to_ipb(priority));
1762     }
1763 
1764     return !!count;
1765 }
1766 
1767 /*
1768  * Notification using the END ESe/ESn bit (Event State Buffer for
1769  * escalation and notification). Provide further coalescing in the
1770  * Router.
1771  */
1772 static bool xive_router_end_es_notify(XiveRouter *xrtr, uint8_t end_blk,
1773                                       uint32_t end_idx, XiveEND *end,
1774                                       uint32_t end_esmask)
1775 {
1776     uint8_t pq = xive_get_field32(end_esmask, end->w1);
1777     bool notify = xive_esb_trigger(&pq);
1778 
1779     if (pq != xive_get_field32(end_esmask, end->w1)) {
1780         end->w1 = xive_set_field32(end_esmask, end->w1, pq);
1781         xive_router_write_end(xrtr, end_blk, end_idx, end, 1);
1782     }
1783 
1784     /* ESe/n[Q]=1 : end of notification */
1785     return notify;
1786 }
1787 
1788 /*
1789  * An END trigger can come from an event trigger (IPI or HW) or from
1790  * another chip. We don't model the PowerBus but the END trigger
1791  * message has the same parameters than in the function below.
1792  */
1793 void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas)
1794 {
1795     XiveEND end;
1796     uint8_t priority;
1797     uint8_t format;
1798     uint8_t nvt_blk;
1799     uint32_t nvt_idx;
1800     XiveNVT nvt;
1801     bool found;
1802 
1803     uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
1804     uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
1805     uint32_t end_data = xive_get_field64(EAS_END_DATA,  eas->w);
1806 
1807     /* END cache lookup */
1808     if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
1809         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
1810                       end_idx);
1811         return;
1812     }
1813 
1814     if (!xive_end_is_valid(&end)) {
1815         trace_xive_router_end_notify(end_blk, end_idx, end_data);
1816         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
1817                       end_blk, end_idx);
1818         return;
1819     }
1820 
1821     if (xive_end_is_enqueue(&end)) {
1822         xive_end_enqueue(&end, end_data);
1823         /* Enqueuing event data modifies the EQ toggle and index */
1824         xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
1825     }
1826 
1827     /*
1828      * When the END is silent, we skip the notification part.
1829      */
1830     if (xive_end_is_silent_escalation(&end)) {
1831         goto do_escalation;
1832     }
1833 
1834     /*
1835      * The W7 format depends on the F bit in W6. It defines the type
1836      * of the notification :
1837      *
1838      *   F=0 : single or multiple NVT notification
1839      *   F=1 : User level Event-Based Branch (EBB) notification, no
1840      *         priority
1841      */
1842     format = xive_get_field32(END_W6_FORMAT_BIT, end.w6);
1843     priority = xive_get_field32(END_W7_F0_PRIORITY, end.w7);
1844 
1845     /* The END is masked */
1846     if (format == 0 && priority == 0xff) {
1847         return;
1848     }
1849 
1850     /*
1851      * Check the END ESn (Event State Buffer for notification) for
1852      * even further coalescing in the Router
1853      */
1854     if (!xive_end_is_notify(&end)) {
1855         /* ESn[Q]=1 : end of notification */
1856         if (!xive_router_end_es_notify(xrtr, end_blk, end_idx,
1857                                        &end, END_W1_ESn)) {
1858             return;
1859         }
1860     }
1861 
1862     /*
1863      * Follows IVPE notification
1864      */
1865     nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end.w6);
1866     nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end.w6);
1867 
1868     /* NVT cache lookup */
1869     if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
1870         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVT %x/%x\n",
1871                       nvt_blk, nvt_idx);
1872         return;
1873     }
1874 
1875     if (!xive_nvt_is_valid(&nvt)) {
1876         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is invalid\n",
1877                       nvt_blk, nvt_idx);
1878         return;
1879     }
1880 
1881     found = xive_presenter_notify(xrtr->xfb, format, nvt_blk, nvt_idx,
1882                           xive_get_field32(END_W7_F0_IGNORE, end.w7),
1883                           priority,
1884                           xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7));
1885 
1886     /* TODO: Auto EOI. */
1887 
1888     if (found) {
1889         return;
1890     }
1891 
1892     /*
1893      * If no matching NVT is dispatched on a HW thread :
1894      * - specific VP: update the NVT structure if backlog is activated
1895      * - logical server : forward request to IVPE (not supported)
1896      */
1897     if (xive_end_is_backlog(&end)) {
1898         uint8_t ipb;
1899 
1900         if (format == 1) {
1901             qemu_log_mask(LOG_GUEST_ERROR,
1902                           "XIVE: END %x/%x invalid config: F1 & backlog\n",
1903                           end_blk, end_idx);
1904             return;
1905         }
1906         /*
1907          * Record the IPB in the associated NVT structure for later
1908          * use. The presenter will resend the interrupt when the vCPU
1909          * is dispatched again on a HW thread.
1910          */
1911         ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) |
1912             xive_priority_to_ipb(priority);
1913         nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, ipb);
1914         xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
1915 
1916         /*
1917          * On HW, follows a "Broadcast Backlog" to IVPEs
1918          */
1919     }
1920 
1921 do_escalation:
1922     /*
1923      * If activated, escalate notification using the ESe PQ bits and
1924      * the EAS in w4-5
1925      */
1926     if (!xive_end_is_escalate(&end)) {
1927         return;
1928     }
1929 
1930     /*
1931      * Check the END ESe (Event State Buffer for escalation) for even
1932      * further coalescing in the Router
1933      */
1934     if (!xive_end_is_uncond_escalation(&end)) {
1935         /* ESe[Q]=1 : end of notification */
1936         if (!xive_router_end_es_notify(xrtr, end_blk, end_idx,
1937                                        &end, END_W1_ESe)) {
1938             return;
1939         }
1940     }
1941 
1942     trace_xive_router_end_escalate(end_blk, end_idx,
1943            (uint8_t) xive_get_field32(END_W4_ESC_END_BLOCK, end.w4),
1944            (uint32_t) xive_get_field32(END_W4_ESC_END_INDEX, end.w4),
1945            (uint32_t) xive_get_field32(END_W5_ESC_END_DATA,  end.w5));
1946     /*
1947      * The END trigger becomes an Escalation trigger
1948      */
1949     xive_router_end_notify_handler(xrtr, (XiveEAS *) &end.w4);
1950 }
1951 
1952 void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked)
1953 {
1954     XiveRouter *xrtr = XIVE_ROUTER(xn);
1955     uint8_t eas_blk = XIVE_EAS_BLOCK(lisn);
1956     uint32_t eas_idx = XIVE_EAS_INDEX(lisn);
1957     XiveEAS eas;
1958 
1959     /* EAS cache lookup */
1960     if (xive_router_get_eas(xrtr, eas_blk, eas_idx, &eas)) {
1961         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
1962         return;
1963     }
1964 
1965     if (!pq_checked) {
1966         bool notify;
1967         uint8_t pq;
1968 
1969         /* PQ cache lookup */
1970         if (xive_router_get_pq(xrtr, eas_blk, eas_idx, &pq)) {
1971             /* Set FIR */
1972             g_assert_not_reached();
1973         }
1974 
1975         notify = xive_esb_trigger(&pq);
1976 
1977         if (xive_router_set_pq(xrtr, eas_blk, eas_idx, &pq)) {
1978             /* Set FIR */
1979             g_assert_not_reached();
1980         }
1981 
1982         if (!notify) {
1983             return;
1984         }
1985     }
1986 
1987     if (!xive_eas_is_valid(&eas)) {
1988         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
1989         return;
1990     }
1991 
1992     if (xive_eas_is_masked(&eas)) {
1993         /* Notification completed */
1994         return;
1995     }
1996 
1997     /*
1998      * The event trigger becomes an END trigger
1999      */
2000     xive_router_end_notify_handler(xrtr, &eas);
2001 }
2002 
2003 static const Property xive_router_properties[] = {
2004     DEFINE_PROP_LINK("xive-fabric", XiveRouter, xfb,
2005                      TYPE_XIVE_FABRIC, XiveFabric *),
2006 };
2007 
2008 static void xive_router_class_init(ObjectClass *klass, void *data)
2009 {
2010     DeviceClass *dc = DEVICE_CLASS(klass);
2011     XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
2012     XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass);
2013 
2014     dc->desc    = "XIVE Router Engine";
2015     device_class_set_props(dc, xive_router_properties);
2016     /* Parent is SysBusDeviceClass. No need to call its realize hook */
2017     dc->realize = xive_router_realize;
2018     xnc->notify = xive_router_notify;
2019 
2020     /* By default, the router handles END triggers locally */
2021     xrc->end_notify = xive_router_end_notify;
2022 }
2023 
2024 static const TypeInfo xive_router_info = {
2025     .name          = TYPE_XIVE_ROUTER,
2026     .parent        = TYPE_SYS_BUS_DEVICE,
2027     .abstract      = true,
2028     .instance_size = sizeof(XiveRouter),
2029     .class_size    = sizeof(XiveRouterClass),
2030     .class_init    = xive_router_class_init,
2031     .interfaces    = (InterfaceInfo[]) {
2032         { TYPE_XIVE_NOTIFIER },
2033         { TYPE_XIVE_PRESENTER },
2034         { }
2035     }
2036 };
2037 
2038 void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, GString *buf)
2039 {
2040     if (!xive_eas_is_valid(eas)) {
2041         return;
2042     }
2043 
2044     g_string_append_printf(buf, "  %08x %s end:%02x/%04x data:%08x\n",
2045                            lisn, xive_eas_is_masked(eas) ? "M" : " ",
2046                            (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
2047                            (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
2048                            (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
2049 }
2050 
2051 /*
2052  * END ESB MMIO loads
2053  */
2054 static uint64_t xive_end_source_read(void *opaque, hwaddr addr, unsigned size)
2055 {
2056     XiveENDSource *xsrc = XIVE_END_SOURCE(opaque);
2057     uint32_t offset = addr & 0xFFF;
2058     uint8_t end_blk;
2059     uint32_t end_idx;
2060     XiveEND end;
2061     uint32_t end_esmask;
2062     uint8_t pq;
2063     uint64_t ret = -1;
2064 
2065     /*
2066      * The block id should be deduced from the load address on the END
2067      * ESB MMIO but our model only supports a single block per XIVE chip.
2068      */
2069     end_blk = xive_router_get_block_id(xsrc->xrtr);
2070     end_idx = addr >> (xsrc->esb_shift + 1);
2071 
2072     trace_xive_end_source_read(end_blk, end_idx, addr);
2073 
2074     if (xive_router_get_end(xsrc->xrtr, end_blk, end_idx, &end)) {
2075         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
2076                       end_idx);
2077         return -1;
2078     }
2079 
2080     if (!xive_end_is_valid(&end)) {
2081         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
2082                       end_blk, end_idx);
2083         return -1;
2084     }
2085 
2086     end_esmask = addr_is_even(addr, xsrc->esb_shift) ? END_W1_ESn : END_W1_ESe;
2087     pq = xive_get_field32(end_esmask, end.w1);
2088 
2089     switch (offset) {
2090     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
2091         ret = xive_esb_eoi(&pq);
2092 
2093         /* Forward the source event notification for routing ?? */
2094         break;
2095 
2096     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
2097         ret = pq;
2098         break;
2099 
2100     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
2101     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
2102     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
2103     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
2104         ret = xive_esb_set(&pq, (offset >> 8) & 0x3);
2105         break;
2106     default:
2107         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid END ESB load addr %d\n",
2108                       offset);
2109         return -1;
2110     }
2111 
2112     if (pq != xive_get_field32(end_esmask, end.w1)) {
2113         end.w1 = xive_set_field32(end_esmask, end.w1, pq);
2114         xive_router_write_end(xsrc->xrtr, end_blk, end_idx, &end, 1);
2115     }
2116 
2117     return ret;
2118 }
2119 
2120 /*
2121  * END ESB MMIO stores are invalid
2122  */
2123 static void xive_end_source_write(void *opaque, hwaddr addr,
2124                                   uint64_t value, unsigned size)
2125 {
2126     qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr 0x%"
2127                   HWADDR_PRIx"\n", addr);
2128 }
2129 
2130 static const MemoryRegionOps xive_end_source_ops = {
2131     .read = xive_end_source_read,
2132     .write = xive_end_source_write,
2133     .endianness = DEVICE_BIG_ENDIAN,
2134     .valid = {
2135         .min_access_size = 1,
2136         .max_access_size = 8,
2137     },
2138     .impl = {
2139         .min_access_size = 1,
2140         .max_access_size = 8,
2141     },
2142 };
2143 
2144 static void xive_end_source_realize(DeviceState *dev, Error **errp)
2145 {
2146     XiveENDSource *xsrc = XIVE_END_SOURCE(dev);
2147 
2148     assert(xsrc->xrtr);
2149 
2150     if (!xsrc->nr_ends) {
2151         error_setg(errp, "Number of interrupt needs to be greater than 0");
2152         return;
2153     }
2154 
2155     if (xsrc->esb_shift != XIVE_ESB_4K &&
2156         xsrc->esb_shift != XIVE_ESB_64K) {
2157         error_setg(errp, "Invalid ESB shift setting");
2158         return;
2159     }
2160 
2161     /*
2162      * Each END is assigned an even/odd pair of MMIO pages, the even page
2163      * manages the ESn field while the odd page manages the ESe field.
2164      */
2165     memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
2166                           &xive_end_source_ops, xsrc, "xive.end",
2167                           (1ull << (xsrc->esb_shift + 1)) * xsrc->nr_ends);
2168 }
2169 
2170 static const Property xive_end_source_properties[] = {
2171     DEFINE_PROP_UINT32("nr-ends", XiveENDSource, nr_ends, 0),
2172     DEFINE_PROP_UINT32("shift", XiveENDSource, esb_shift, XIVE_ESB_64K),
2173     DEFINE_PROP_LINK("xive", XiveENDSource, xrtr, TYPE_XIVE_ROUTER,
2174                      XiveRouter *),
2175 };
2176 
2177 static void xive_end_source_class_init(ObjectClass *klass, void *data)
2178 {
2179     DeviceClass *dc = DEVICE_CLASS(klass);
2180 
2181     dc->desc    = "XIVE END Source";
2182     device_class_set_props(dc, xive_end_source_properties);
2183     dc->realize = xive_end_source_realize;
2184     /*
2185      * Reason: part of XIVE interrupt controller, needs to be wired up,
2186      * e.g. by spapr_xive_instance_init().
2187      */
2188     dc->user_creatable = false;
2189 }
2190 
2191 static const TypeInfo xive_end_source_info = {
2192     .name          = TYPE_XIVE_END_SOURCE,
2193     .parent        = TYPE_DEVICE,
2194     .instance_size = sizeof(XiveENDSource),
2195     .class_init    = xive_end_source_class_init,
2196 };
2197 
2198 /*
2199  * XIVE Notifier
2200  */
2201 static const TypeInfo xive_notifier_info = {
2202     .name = TYPE_XIVE_NOTIFIER,
2203     .parent = TYPE_INTERFACE,
2204     .class_size = sizeof(XiveNotifierClass),
2205 };
2206 
2207 /*
2208  * XIVE Presenter
2209  */
2210 static const TypeInfo xive_presenter_info = {
2211     .name = TYPE_XIVE_PRESENTER,
2212     .parent = TYPE_INTERFACE,
2213     .class_size = sizeof(XivePresenterClass),
2214 };
2215 
2216 /*
2217  * XIVE Fabric
2218  */
2219 static const TypeInfo xive_fabric_info = {
2220     .name = TYPE_XIVE_FABRIC,
2221     .parent = TYPE_INTERFACE,
2222     .class_size = sizeof(XiveFabricClass),
2223 };
2224 
2225 static void xive_register_types(void)
2226 {
2227     type_register_static(&xive_fabric_info);
2228     type_register_static(&xive_source_info);
2229     type_register_static(&xive_notifier_info);
2230     type_register_static(&xive_presenter_info);
2231     type_register_static(&xive_router_info);
2232     type_register_static(&xive_end_source_info);
2233     type_register_static(&xive_tctx_info);
2234 }
2235 
2236 type_init(xive_register_types)
2237