xref: /qemu/hw/ppc/spapr_irq.c (revision 98a39a7927b510fcdd29f8237b67368a66121c84)
1 /*
2  * QEMU PowerPC sPAPR IRQ interface
3  *
4  * Copyright (c) 2018, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/log.h"
12 #include "qemu/error-report.h"
13 #include "qapi/error.h"
14 #include "hw/irq.h"
15 #include "hw/ppc/spapr.h"
16 #include "hw/ppc/spapr_cpu_core.h"
17 #include "hw/ppc/spapr_xive.h"
18 #include "hw/ppc/xics.h"
19 #include "hw/ppc/xics_spapr.h"
20 #include "hw/qdev-properties.h"
21 #include "cpu-models.h"
22 #include "sysemu/kvm.h"
23 
24 #include "trace.h"
25 
26 static const TypeInfo spapr_intc_info = {
27     .name = TYPE_SPAPR_INTC,
28     .parent = TYPE_INTERFACE,
29     .class_size = sizeof(SpaprInterruptControllerClass),
30 };
31 
32 void spapr_irq_msi_init(SpaprMachineState *spapr, uint32_t nr_msis)
33 {
34     spapr->irq_map_nr = nr_msis;
35     spapr->irq_map = bitmap_new(spapr->irq_map_nr);
36 }
37 
38 int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
39                         Error **errp)
40 {
41     int irq;
42 
43     /*
44      * The 'align_mask' parameter of bitmap_find_next_zero_area()
45      * should be one less than a power of 2; 0 means no
46      * alignment. Adapt the 'align' value of the former allocator
47      * to fit the requirements of bitmap_find_next_zero_area()
48      */
49     align -= 1;
50 
51     irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
52                                      align);
53     if (irq == spapr->irq_map_nr) {
54         error_setg(errp, "can't find a free %d-IRQ block", num);
55         return -1;
56     }
57 
58     bitmap_set(spapr->irq_map, irq, num);
59 
60     return irq + SPAPR_IRQ_MSI;
61 }
62 
63 void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num)
64 {
65     bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
66 }
67 
68 static void spapr_irq_init_kvm(SpaprMachineState *spapr,
69                                   SpaprIrq *irq, Error **errp)
70 {
71     MachineState *machine = MACHINE(spapr);
72     Error *local_err = NULL;
73 
74     if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
75         irq->init_kvm(spapr, &local_err);
76         if (local_err && machine_kernel_irqchip_required(machine)) {
77             error_prepend(&local_err,
78                           "kernel_irqchip requested but unavailable: ");
79             error_propagate(errp, local_err);
80             return;
81         }
82 
83         if (!local_err) {
84             return;
85         }
86 
87         /*
88          * We failed to initialize the KVM device, fallback to
89          * emulated mode
90          */
91         error_prepend(&local_err, "kernel_irqchip allowed but unavailable: ");
92         error_append_hint(&local_err, "Falling back to kernel-irqchip=off\n");
93         warn_report_err(local_err);
94     }
95 }
96 
97 /*
98  * XICS IRQ backend.
99  */
100 
101 static int spapr_irq_post_load_xics(SpaprMachineState *spapr, int version_id)
102 {
103     if (!kvm_irqchip_in_kernel()) {
104         CPUState *cs;
105         CPU_FOREACH(cs) {
106             PowerPCCPU *cpu = POWERPC_CPU(cs);
107             icp_resend(spapr_cpu_state(cpu)->icp);
108         }
109     }
110     return 0;
111 }
112 
113 static void spapr_irq_reset_xics(SpaprMachineState *spapr, Error **errp)
114 {
115     Error *local_err = NULL;
116 
117     spapr_irq_init_kvm(spapr, &spapr_irq_xics, &local_err);
118     if (local_err) {
119         error_propagate(errp, local_err);
120         return;
121     }
122 }
123 
124 static void spapr_irq_init_kvm_xics(SpaprMachineState *spapr, Error **errp)
125 {
126     if (kvm_enabled()) {
127         xics_kvm_connect(SPAPR_INTC(spapr->ics), errp);
128     }
129 }
130 
131 SpaprIrq spapr_irq_xics = {
132     .nr_xirqs    = SPAPR_NR_XIRQS,
133     .nr_msis     = SPAPR_NR_MSIS,
134     .xics        = true,
135     .xive        = false,
136 
137     .post_load   = spapr_irq_post_load_xics,
138     .reset       = spapr_irq_reset_xics,
139     .init_kvm    = spapr_irq_init_kvm_xics,
140 };
141 
142 /*
143  * XIVE IRQ backend.
144  */
145 
146 static int spapr_irq_post_load_xive(SpaprMachineState *spapr, int version_id)
147 {
148     return spapr_xive_post_load(spapr->xive, version_id);
149 }
150 
151 static void spapr_irq_reset_xive(SpaprMachineState *spapr, Error **errp)
152 {
153     CPUState *cs;
154     Error *local_err = NULL;
155 
156     CPU_FOREACH(cs) {
157         PowerPCCPU *cpu = POWERPC_CPU(cs);
158 
159         /* (TCG) Set the OS CAM line of the thread interrupt context. */
160         spapr_xive_set_tctx_os_cam(spapr_cpu_state(cpu)->tctx);
161     }
162 
163     spapr_irq_init_kvm(spapr, &spapr_irq_xive, &local_err);
164     if (local_err) {
165         error_propagate(errp, local_err);
166         return;
167     }
168 
169     /* Activate the XIVE MMIOs */
170     spapr_xive_mmio_set_enabled(spapr->xive, true);
171 }
172 
173 static void spapr_irq_init_kvm_xive(SpaprMachineState *spapr, Error **errp)
174 {
175     if (kvm_enabled()) {
176         kvmppc_xive_connect(SPAPR_INTC(spapr->xive), errp);
177     }
178 }
179 
180 SpaprIrq spapr_irq_xive = {
181     .nr_xirqs    = SPAPR_NR_XIRQS,
182     .nr_msis     = SPAPR_NR_MSIS,
183     .xics        = false,
184     .xive        = true,
185 
186     .post_load   = spapr_irq_post_load_xive,
187     .reset       = spapr_irq_reset_xive,
188     .init_kvm    = spapr_irq_init_kvm_xive,
189 };
190 
191 /*
192  * Dual XIVE and XICS IRQ backend.
193  *
194  * Both interrupt mode, XIVE and XICS, objects are created but the
195  * machine starts in legacy interrupt mode (XICS). It can be changed
196  * by the CAS negotiation process and, in that case, the new mode is
197  * activated after an extra machine reset.
198  */
199 
200 /*
201  * Returns the sPAPR IRQ backend negotiated by CAS. XICS is the
202  * default.
203  */
204 static SpaprIrq *spapr_irq_current(SpaprMachineState *spapr)
205 {
206     return spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT) ?
207         &spapr_irq_xive : &spapr_irq_xics;
208 }
209 
210 static int spapr_irq_post_load_dual(SpaprMachineState *spapr, int version_id)
211 {
212     /*
213      * Force a reset of the XIVE backend after migration. The machine
214      * defaults to XICS at startup.
215      */
216     if (spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
217         if (kvm_irqchip_in_kernel()) {
218             xics_kvm_disconnect(SPAPR_INTC(spapr->ics));
219         }
220         spapr_irq_xive.reset(spapr, &error_fatal);
221     }
222 
223     return spapr_irq_current(spapr)->post_load(spapr, version_id);
224 }
225 
226 static void spapr_irq_reset_dual(SpaprMachineState *spapr, Error **errp)
227 {
228     /*
229      * Deactivate the XIVE MMIOs. The XIVE backend will reenable them
230      * if selected.
231      */
232     spapr_xive_mmio_set_enabled(spapr->xive, false);
233 
234     /* Destroy all KVM devices */
235     if (kvm_irqchip_in_kernel()) {
236         xics_kvm_disconnect(SPAPR_INTC(spapr->ics));
237         kvmppc_xive_disconnect(SPAPR_INTC(spapr->xive));
238     }
239 
240     spapr_irq_current(spapr)->reset(spapr, errp);
241 }
242 
243 /*
244  * Define values in sync with the XIVE and XICS backend
245  */
246 SpaprIrq spapr_irq_dual = {
247     .nr_xirqs    = SPAPR_NR_XIRQS,
248     .nr_msis     = SPAPR_NR_MSIS,
249     .xics        = true,
250     .xive        = true,
251 
252     .post_load   = spapr_irq_post_load_dual,
253     .reset       = spapr_irq_reset_dual,
254     .init_kvm    = NULL, /* should not be used */
255 };
256 
257 
258 static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
259 {
260     MachineState *machine = MACHINE(spapr);
261 
262     /*
263      * Sanity checks on non-P9 machines. On these, XIVE is not
264      * advertised, see spapr_dt_ov5_platform_support()
265      */
266     if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
267                                0, spapr->max_compat_pvr)) {
268         /*
269          * If the 'dual' interrupt mode is selected, force XICS as CAS
270          * negotiation is useless.
271          */
272         if (spapr->irq == &spapr_irq_dual) {
273             spapr->irq = &spapr_irq_xics;
274             return 0;
275         }
276 
277         /*
278          * Non-P9 machines using only XIVE is a bogus setup. We have two
279          * scenarios to take into account because of the compat mode:
280          *
281          * 1. POWER7/8 machines should fail to init later on when creating
282          *    the XIVE interrupt presenters because a POWER9 exception
283          *    model is required.
284 
285          * 2. POWER9 machines using the POWER8 compat mode won't fail and
286          *    will let the OS boot with a partial XIVE setup : DT
287          *    properties but no hcalls.
288          *
289          * To cover both and not confuse the OS, add an early failure in
290          * QEMU.
291          */
292         if (spapr->irq == &spapr_irq_xive) {
293             error_setg(errp, "XIVE-only machines require a POWER9 CPU");
294             return -1;
295         }
296     }
297 
298     /*
299      * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
300      * re-created. Detect that early to avoid QEMU to exit later when the
301      * guest reboots.
302      */
303     if (kvm_enabled() &&
304         spapr->irq == &spapr_irq_dual &&
305         machine_kernel_irqchip_required(machine) &&
306         xics_kvm_has_broken_disconnect(spapr)) {
307         error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on");
308         return -1;
309     }
310 
311     return 0;
312 }
313 
314 /*
315  * sPAPR IRQ frontend routines for devices
316  */
317 #define ALL_INTCS(spapr_) \
318     { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), }
319 
320 int spapr_irq_cpu_intc_create(SpaprMachineState *spapr,
321                               PowerPCCPU *cpu, Error **errp)
322 {
323     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
324     int i;
325     int rc;
326 
327     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
328         SpaprInterruptController *intc = intcs[i];
329         if (intc) {
330             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
331             rc = sicc->cpu_intc_create(intc, cpu, errp);
332             if (rc < 0) {
333                 return rc;
334             }
335         }
336     }
337 
338     return 0;
339 }
340 
341 static void spapr_set_irq(void *opaque, int irq, int level)
342 {
343     SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
344     SpaprInterruptControllerClass *sicc
345         = SPAPR_INTC_GET_CLASS(spapr->active_intc);
346 
347     sicc->set_irq(spapr->active_intc, irq, level);
348 }
349 
350 void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon)
351 {
352     SpaprInterruptControllerClass *sicc
353         = SPAPR_INTC_GET_CLASS(spapr->active_intc);
354 
355     sicc->print_info(spapr->active_intc, mon);
356 }
357 
358 void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers,
359                   void *fdt, uint32_t phandle)
360 {
361     SpaprInterruptControllerClass *sicc
362         = SPAPR_INTC_GET_CLASS(spapr->active_intc);
363 
364     sicc->dt(spapr->active_intc, nr_servers, fdt, phandle);
365 }
366 
367 void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
368 {
369     MachineState *machine = MACHINE(spapr);
370 
371     if (machine_kernel_irqchip_split(machine)) {
372         error_setg(errp, "kernel_irqchip split mode not supported on pseries");
373         return;
374     }
375 
376     if (!kvm_enabled() && machine_kernel_irqchip_required(machine)) {
377         error_setg(errp,
378                    "kernel_irqchip requested but only available with KVM");
379         return;
380     }
381 
382     if (spapr_irq_check(spapr, errp) < 0) {
383         return;
384     }
385 
386     /* Initialize the MSI IRQ allocator. */
387     if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
388         spapr_irq_msi_init(spapr, spapr->irq->nr_msis);
389     }
390 
391     if (spapr->irq->xics) {
392         Error *local_err = NULL;
393         Object *obj;
394 
395         obj = object_new(TYPE_ICS_SPAPR);
396         object_property_add_child(OBJECT(spapr), "ics", obj, &local_err);
397         if (local_err) {
398             error_propagate(errp, local_err);
399             return;
400         }
401 
402         object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr),
403                                        &local_err);
404         if (local_err) {
405             error_propagate(errp, local_err);
406             return;
407         }
408 
409         object_property_set_int(obj, spapr->irq->nr_xirqs, "nr-irqs",
410                                 &local_err);
411         if (local_err) {
412             error_propagate(errp, local_err);
413             return;
414         }
415 
416         object_property_set_bool(obj, true, "realized", &local_err);
417         if (local_err) {
418             error_propagate(errp, local_err);
419             return;
420         }
421 
422         spapr->ics = ICS_SPAPR(obj);
423     }
424 
425     if (spapr->irq->xive) {
426         uint32_t nr_servers = spapr_max_server_number(spapr);
427         DeviceState *dev;
428         int i;
429 
430         dev = qdev_create(NULL, TYPE_SPAPR_XIVE);
431         qdev_prop_set_uint32(dev, "nr-irqs",
432                              spapr->irq->nr_xirqs + SPAPR_XIRQ_BASE);
433         /*
434          * 8 XIVE END structures per CPU. One for each available
435          * priority
436          */
437         qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
438         qdev_init_nofail(dev);
439 
440         spapr->xive = SPAPR_XIVE(dev);
441 
442         /* Enable the CPU IPIs */
443         for (i = 0; i < nr_servers; ++i) {
444             SpaprInterruptControllerClass *sicc
445                 = SPAPR_INTC_GET_CLASS(spapr->xive);
446 
447             if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i,
448                                 false, errp) < 0) {
449                 return;
450             }
451         }
452 
453         spapr_xive_hcall_init(spapr);
454     }
455 
456     spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
457                                       spapr->irq->nr_xirqs + SPAPR_XIRQ_BASE);
458 }
459 
460 int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp)
461 {
462     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
463     int i;
464     int rc;
465 
466     assert(irq >= SPAPR_XIRQ_BASE);
467     assert(irq < (spapr->irq->nr_xirqs + SPAPR_XIRQ_BASE));
468 
469     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
470         SpaprInterruptController *intc = intcs[i];
471         if (intc) {
472             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
473             rc = sicc->claim_irq(intc, irq, lsi, errp);
474             if (rc < 0) {
475                 return rc;
476             }
477         }
478     }
479 
480     return 0;
481 }
482 
483 void spapr_irq_free(SpaprMachineState *spapr, int irq, int num)
484 {
485     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
486     int i, j;
487 
488     assert(irq >= SPAPR_XIRQ_BASE);
489     assert((irq + num) <= (spapr->irq->nr_xirqs + SPAPR_XIRQ_BASE));
490 
491     for (i = irq; i < (irq + num); i++) {
492         for (j = 0; j < ARRAY_SIZE(intcs); j++) {
493             SpaprInterruptController *intc = intcs[j];
494 
495             if (intc) {
496                 SpaprInterruptControllerClass *sicc
497                     = SPAPR_INTC_GET_CLASS(intc);
498                 sicc->free_irq(intc, i);
499             }
500         }
501     }
502 }
503 
504 qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq)
505 {
506     /*
507      * This interface is basically for VIO and PHB devices to find the
508      * right qemu_irq to manipulate, so we only allow access to the
509      * external irqs for now.  Currently anything which needs to
510      * access the IPIs most naturally gets there via the guest side
511      * interfaces, we can change this if we need to in future.
512      */
513     assert(irq >= SPAPR_XIRQ_BASE);
514     assert(irq < (spapr->irq->nr_xirqs + SPAPR_XIRQ_BASE));
515 
516     if (spapr->ics) {
517         assert(ics_valid_irq(spapr->ics, irq));
518     }
519     if (spapr->xive) {
520         assert(irq < spapr->xive->nr_irqs);
521         assert(xive_eas_is_valid(&spapr->xive->eat[irq]));
522     }
523 
524     return spapr->qirqs[irq];
525 }
526 
527 int spapr_irq_post_load(SpaprMachineState *spapr, int version_id)
528 {
529     spapr_irq_update_active_intc(spapr);
530     return spapr->irq->post_load(spapr, version_id);
531 }
532 
533 void spapr_irq_reset(SpaprMachineState *spapr, Error **errp)
534 {
535     assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr));
536 
537     spapr_irq_update_active_intc(spapr);
538 
539     if (spapr->irq->reset) {
540         spapr->irq->reset(spapr, errp);
541     }
542 }
543 
544 int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp)
545 {
546     const char *nodename = "interrupt-controller";
547     int offset, phandle;
548 
549     offset = fdt_subnode_offset(fdt, 0, nodename);
550     if (offset < 0) {
551         error_setg(errp, "Can't find node \"%s\": %s",
552                    nodename, fdt_strerror(offset));
553         return -1;
554     }
555 
556     phandle = fdt_get_phandle(fdt, offset);
557     if (!phandle) {
558         error_setg(errp, "Can't get phandle of node \"%s\"", nodename);
559         return -1;
560     }
561 
562     return phandle;
563 }
564 
565 static void set_active_intc(SpaprMachineState *spapr,
566                             SpaprInterruptController *new_intc)
567 {
568     SpaprInterruptControllerClass *sicc;
569 
570     assert(new_intc);
571 
572     if (new_intc == spapr->active_intc) {
573         /* Nothing to do */
574         return;
575     }
576 
577     if (spapr->active_intc) {
578         sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
579         if (sicc->deactivate) {
580             sicc->deactivate(spapr->active_intc);
581         }
582     }
583 
584     sicc = SPAPR_INTC_GET_CLASS(new_intc);
585     if (sicc->activate) {
586         sicc->activate(new_intc, &error_fatal);
587     }
588 
589     spapr->active_intc = new_intc;
590 }
591 
592 void spapr_irq_update_active_intc(SpaprMachineState *spapr)
593 {
594     SpaprInterruptController *new_intc;
595 
596     if (!spapr->ics) {
597         /*
598          * XXX before we run CAS, ov5_cas is initialized empty, which
599          * indicates XICS, even if we have ic-mode=xive.  TODO: clean
600          * up the CAS path so that we have a clearer way of handling
601          * this.
602          */
603         new_intc = SPAPR_INTC(spapr->xive);
604     } else if (spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
605         new_intc = SPAPR_INTC(spapr->xive);
606     } else {
607         new_intc = SPAPR_INTC(spapr->ics);
608     }
609 
610     set_active_intc(spapr, new_intc);
611 }
612 
613 /*
614  * XICS legacy routines - to deprecate one day
615  */
616 
617 static int ics_find_free_block(ICSState *ics, int num, int alignnum)
618 {
619     int first, i;
620 
621     for (first = 0; first < ics->nr_irqs; first += alignnum) {
622         if (num > (ics->nr_irqs - first)) {
623             return -1;
624         }
625         for (i = first; i < first + num; ++i) {
626             if (!ics_irq_free(ics, i)) {
627                 break;
628             }
629         }
630         if (i == (first + num)) {
631             return first;
632         }
633     }
634 
635     return -1;
636 }
637 
638 int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
639 {
640     ICSState *ics = spapr->ics;
641     int first = -1;
642 
643     assert(ics);
644 
645     /*
646      * MSIMesage::data is used for storing VIRQ so
647      * it has to be aligned to num to support multiple
648      * MSI vectors. MSI-X is not affected by this.
649      * The hint is used for the first IRQ, the rest should
650      * be allocated continuously.
651      */
652     if (align) {
653         assert((num == 1) || (num == 2) || (num == 4) ||
654                (num == 8) || (num == 16) || (num == 32));
655         first = ics_find_free_block(ics, num, num);
656     } else {
657         first = ics_find_free_block(ics, num, 1);
658     }
659 
660     if (first < 0) {
661         error_setg(errp, "can't find a free %d-IRQ block", num);
662         return -1;
663     }
664 
665     return first + ics->offset;
666 }
667 
668 #define SPAPR_IRQ_XICS_LEGACY_NR_XIRQS     0x400
669 
670 SpaprIrq spapr_irq_xics_legacy = {
671     .nr_xirqs    = SPAPR_IRQ_XICS_LEGACY_NR_XIRQS,
672     .nr_msis     = SPAPR_IRQ_XICS_LEGACY_NR_XIRQS,
673     .xics        = true,
674     .xive        = false,
675 
676     .post_load   = spapr_irq_post_load_xics,
677     .reset       = spapr_irq_reset_xics,
678     .init_kvm    = spapr_irq_init_kvm_xics,
679 };
680 
681 static void spapr_irq_register_types(void)
682 {
683     type_register_static(&spapr_intc_info);
684 }
685 
686 type_init(spapr_irq_register_types)
687