1
2 #include "kvm/devices.h"
3 #include "kvm/fdt.h"
4 #include "kvm/ioeventfd.h"
5 #include "kvm/ioport.h"
6 #include "kvm/kvm.h"
7 #include "kvm/kvm-cpu.h"
8 #include "kvm/irq.h"
9 #include "kvm/mutex.h"
10
11 #include <linux/byteorder.h>
12 #include <linux/kernel.h>
13 #include <linux/kvm.h>
14 #include <linux/sizes.h>
15
16 /*
17 * From the RISC-V Privlidged Spec v1.10:
18 *
19 * Global interrupt sources are assigned small unsigned integer identifiers,
20 * beginning at the value 1. An interrupt ID of 0 is reserved to mean no
21 * interrupt. Interrupt identifiers are also used to break ties when two or
22 * more interrupt sources have the same assigned priority. Smaller values of
23 * interrupt ID take precedence over larger values of interrupt ID.
24 *
25 * While the RISC-V supervisor spec doesn't define the maximum number of
26 * devices supported by the PLIC, the largest number supported by devices
27 * marked as 'riscv,plic0' (which is the only device type this driver supports,
28 * and is the only extant PLIC as of now) is 1024. As mentioned above, device
29 * 0 is defined to be non-existant so this device really only supports 1023
30 * devices.
31 */
32
33 #define MAX_DEVICES 1024
34 #define MAX_CONTEXTS 15872
35
36 /*
37 * The PLIC consists of memory-mapped control registers, with a memory map as
38 * follows:
39 *
40 * base + 0x000000: Reserved (interrupt source 0 does not exist)
41 * base + 0x000004: Interrupt source 1 priority
42 * base + 0x000008: Interrupt source 2 priority
43 * ...
44 * base + 0x000FFC: Interrupt source 1023 priority
45 * base + 0x001000: Pending 0
46 * base + 0x001FFF: Pending
47 * base + 0x002000: Enable bits for sources 0-31 on context 0
48 * base + 0x002004: Enable bits for sources 32-63 on context 0
49 * ...
50 * base + 0x0020FC: Enable bits for sources 992-1023 on context 0
51 * base + 0x002080: Enable bits for sources 0-31 on context 1
52 * ...
53 * base + 0x002100: Enable bits for sources 0-31 on context 2
54 * ...
55 * base + 0x1F1F80: Enable bits for sources 992-1023 on context 15871
56 * base + 0x1F1F84: Reserved
57 * ... (higher context IDs would fit here, but wouldn't fit
58 * inside the per-context priority vector)
59 * base + 0x1FFFFC: Reserved
60 * base + 0x200000: Priority threshold for context 0
61 * base + 0x200004: Claim/complete for context 0
62 * base + 0x200008: Reserved
63 * ...
64 * base + 0x200FFC: Reserved
65 * base + 0x201000: Priority threshold for context 1
66 * base + 0x201004: Claim/complete for context 1
67 * ...
68 * base + 0xFFE000: Priority threshold for context 15871
69 * base + 0xFFE004: Claim/complete for context 15871
70 * base + 0xFFE008: Reserved
71 * ...
72 * base + 0xFFFFFC: Reserved
73 */
74
75 /* Each interrupt source has a priority register associated with it. */
76 #define PRIORITY_BASE 0
77 #define PRIORITY_PER_ID 4
78
79 /*
80 * Each hart context has a vector of interupt enable bits associated with it.
81 * There's one bit for each interrupt source.
82 */
83 #define ENABLE_BASE 0x2000
84 #define ENABLE_PER_HART 0x80
85
86 /*
87 * Each hart context has a set of control registers associated with it. Right
88 * now there's only two: a source priority threshold over which the hart will
89 * take an interrupt, and a register to claim interrupts.
90 */
91 #define CONTEXT_BASE 0x200000
92 #define CONTEXT_PER_HART 0x1000
93 #define CONTEXT_THRESHOLD 0
94 #define CONTEXT_CLAIM 4
95
96 #define REG_SIZE 0x1000000
97
98 #define IRQCHIP_PLIC_NR 0
99
100 struct plic_state;
101
102 struct plic_context {
103 /* State to which this belongs */
104 struct plic_state *s;
105
106 /* Static Configuration */
107 u32 num;
108 struct kvm_cpu *vcpu;
109
110 /* Local IRQ state */
111 struct mutex irq_lock;
112 u8 irq_priority_threshold;
113 u32 irq_enable[MAX_DEVICES/32];
114 u32 irq_pending[MAX_DEVICES/32];
115 u8 irq_pending_priority[MAX_DEVICES];
116 u32 irq_claimed[MAX_DEVICES/32];
117 u32 irq_autoclear[MAX_DEVICES/32];
118 };
119
120 struct plic_state {
121 bool ready;
122 struct kvm *kvm;
123
124 /* Static Configuration */
125 u32 num_irq;
126 u32 num_irq_word;
127 u32 max_prio;
128
129 /* Context Array */
130 u32 num_context;
131 struct plic_context *contexts;
132
133 /* Global IRQ state */
134 struct mutex irq_lock;
135 u8 irq_priority[MAX_DEVICES];
136 u32 irq_level[MAX_DEVICES/32];
137 };
138
139 static struct plic_state plic;
140
141 /* Note: Must be called with c->irq_lock held */
__plic_context_best_pending_irq(struct plic_state * s,struct plic_context * c)142 static u32 __plic_context_best_pending_irq(struct plic_state *s,
143 struct plic_context *c)
144 {
145 u8 best_irq_prio = 0;
146 u32 i, j, irq, best_irq = 0;
147
148 for (i = 0; i < s->num_irq_word; i++) {
149 if (!c->irq_pending[i])
150 continue;
151
152 for (j = 0; j < 32; j++) {
153 irq = i * 32 + j;
154 if ((s->num_irq <= irq) ||
155 !(c->irq_pending[i] & (1 << j)) ||
156 (c->irq_claimed[i] & (1 << j)))
157 continue;
158
159 if (!best_irq ||
160 (best_irq_prio < c->irq_pending_priority[irq])) {
161 best_irq = irq;
162 best_irq_prio = c->irq_pending_priority[irq];
163 }
164 }
165 }
166
167 return best_irq;
168 }
169
170 /* Note: Must be called with c->irq_lock held */
__plic_context_irq_update(struct plic_state * s,struct plic_context * c)171 static void __plic_context_irq_update(struct plic_state *s,
172 struct plic_context *c)
173 {
174 u32 best_irq = __plic_context_best_pending_irq(s, c);
175 u32 virq = (best_irq) ? KVM_INTERRUPT_SET : KVM_INTERRUPT_UNSET;
176
177 if (ioctl(c->vcpu->vcpu_fd, KVM_INTERRUPT, &virq) < 0)
178 pr_warning("KVM_INTERRUPT failed");
179 }
180
181 /* Note: Must be called with c->irq_lock held */
__plic_context_irq_claim(struct plic_state * s,struct plic_context * c)182 static u32 __plic_context_irq_claim(struct plic_state *s,
183 struct plic_context *c)
184 {
185 u32 virq = KVM_INTERRUPT_UNSET;
186 u32 best_irq = __plic_context_best_pending_irq(s, c);
187 u32 best_irq_word = best_irq / 32;
188 u32 best_irq_mask = (1 << (best_irq % 32));
189
190 if (ioctl(c->vcpu->vcpu_fd, KVM_INTERRUPT, &virq) < 0)
191 pr_warning("KVM_INTERRUPT failed");
192
193 if (best_irq) {
194 if (c->irq_autoclear[best_irq_word] & best_irq_mask) {
195 c->irq_pending[best_irq_word] &= ~best_irq_mask;
196 c->irq_pending_priority[best_irq] = 0;
197 c->irq_claimed[best_irq_word] &= ~best_irq_mask;
198 c->irq_autoclear[best_irq_word] &= ~best_irq_mask;
199 } else
200 c->irq_claimed[best_irq_word] |= best_irq_mask;
201 }
202
203 __plic_context_irq_update(s, c);
204
205 return best_irq;
206 }
207
plic__irq_trig(struct kvm * kvm,int irq,int level,bool edge)208 static void plic__irq_trig(struct kvm *kvm, int irq, int level, bool edge)
209 {
210 bool irq_marked = false;
211 u8 i, irq_prio, irq_word;
212 u32 irq_mask;
213 struct plic_context *c = NULL;
214 struct plic_state *s = &plic;
215
216 if (!s->ready)
217 return;
218
219 if (irq <= 0 || s->num_irq <= (u32)irq)
220 goto done;
221
222 mutex_lock(&s->irq_lock);
223
224 irq_prio = s->irq_priority[irq];
225 irq_word = irq / 32;
226 irq_mask = 1 << (irq % 32);
227
228 if (level)
229 s->irq_level[irq_word] |= irq_mask;
230 else
231 s->irq_level[irq_word] &= ~irq_mask;
232
233 /*
234 * Note: PLIC interrupts are level-triggered. As of now,
235 * there is no notion of edge-triggered interrupts. To
236 * handle this we auto-clear edge-triggered interrupts
237 * when PLIC context CLAIM register is read.
238 */
239 for (i = 0; i < s->num_context; i++) {
240 c = &s->contexts[i];
241
242 mutex_lock(&c->irq_lock);
243 if (c->irq_enable[irq_word] & irq_mask) {
244 if (level) {
245 c->irq_pending[irq_word] |= irq_mask;
246 c->irq_pending_priority[irq] = irq_prio;
247 if (edge)
248 c->irq_autoclear[irq_word] |= irq_mask;
249 } else {
250 c->irq_pending[irq_word] &= ~irq_mask;
251 c->irq_pending_priority[irq] = 0;
252 c->irq_claimed[irq_word] &= ~irq_mask;
253 c->irq_autoclear[irq_word] &= ~irq_mask;
254 }
255 __plic_context_irq_update(s, c);
256 irq_marked = true;
257 }
258 mutex_unlock(&c->irq_lock);
259
260 if (irq_marked)
261 break;
262 }
263
264 done:
265 mutex_unlock(&s->irq_lock);
266 }
267
plic__priority_read(struct plic_state * s,u64 offset,void * data)268 static void plic__priority_read(struct plic_state *s,
269 u64 offset, void *data)
270 {
271 u32 irq = (offset >> 2);
272
273 if (irq == 0 || irq >= s->num_irq)
274 return;
275
276 mutex_lock(&s->irq_lock);
277 ioport__write32(data, s->irq_priority[irq]);
278 mutex_unlock(&s->irq_lock);
279 }
280
plic__priority_write(struct plic_state * s,u64 offset,void * data)281 static void plic__priority_write(struct plic_state *s,
282 u64 offset, void *data)
283 {
284 u32 val, irq = (offset >> 2);
285
286 if (irq == 0 || irq >= s->num_irq)
287 return;
288
289 mutex_lock(&s->irq_lock);
290 val = ioport__read32(data);
291 val &= ((1 << PRIORITY_PER_ID) - 1);
292 s->irq_priority[irq] = val;
293 mutex_unlock(&s->irq_lock);
294 }
295
plic__context_enable_read(struct plic_state * s,struct plic_context * c,u64 offset,void * data)296 static void plic__context_enable_read(struct plic_state *s,
297 struct plic_context *c,
298 u64 offset, void *data)
299 {
300 u32 irq_word = offset >> 2;
301
302 if (s->num_irq_word < irq_word)
303 return;
304
305 mutex_lock(&c->irq_lock);
306 ioport__write32(data, c->irq_enable[irq_word]);
307 mutex_unlock(&c->irq_lock);
308 }
309
plic__context_enable_write(struct plic_state * s,struct plic_context * c,u64 offset,void * data)310 static void plic__context_enable_write(struct plic_state *s,
311 struct plic_context *c,
312 u64 offset, void *data)
313 {
314 u8 irq_prio;
315 u32 i, irq, irq_mask;
316 u32 irq_word = offset >> 2;
317 u32 old_val, new_val, xor_val;
318
319 if (s->num_irq_word < irq_word)
320 return;
321
322 mutex_lock(&s->irq_lock);
323
324 mutex_lock(&c->irq_lock);
325
326 old_val = c->irq_enable[irq_word];
327 new_val = ioport__read32(data);
328
329 if (irq_word == 0)
330 new_val &= ~0x1;
331
332 c->irq_enable[irq_word] = new_val;
333
334 xor_val = old_val ^ new_val;
335 for (i = 0; i < 32; i++) {
336 irq = irq_word * 32 + i;
337 irq_mask = 1 << i;
338 irq_prio = s->irq_priority[irq];
339 if (!(xor_val & irq_mask))
340 continue;
341 if ((new_val & irq_mask) &&
342 (s->irq_level[irq_word] & irq_mask)) {
343 c->irq_pending[irq_word] |= irq_mask;
344 c->irq_pending_priority[irq] = irq_prio;
345 } else if (!(new_val & irq_mask)) {
346 c->irq_pending[irq_word] &= ~irq_mask;
347 c->irq_pending_priority[irq] = 0;
348 c->irq_claimed[irq_word] &= ~irq_mask;
349 }
350 }
351
352 __plic_context_irq_update(s, c);
353
354 mutex_unlock(&c->irq_lock);
355
356 mutex_unlock(&s->irq_lock);
357 }
358
plic__context_read(struct plic_state * s,struct plic_context * c,u64 offset,void * data)359 static void plic__context_read(struct plic_state *s,
360 struct plic_context *c,
361 u64 offset, void *data)
362 {
363 mutex_lock(&c->irq_lock);
364
365 switch (offset) {
366 case CONTEXT_THRESHOLD:
367 ioport__write32(data, c->irq_priority_threshold);
368 break;
369 case CONTEXT_CLAIM:
370 ioport__write32(data, __plic_context_irq_claim(s, c));
371 break;
372 default:
373 break;
374 };
375
376 mutex_unlock(&c->irq_lock);
377 }
378
plic__context_write(struct plic_state * s,struct plic_context * c,u64 offset,void * data)379 static void plic__context_write(struct plic_state *s,
380 struct plic_context *c,
381 u64 offset, void *data)
382 {
383 u32 val, irq_word, irq_mask;
384 bool irq_update = false;
385
386 mutex_lock(&c->irq_lock);
387
388 switch (offset) {
389 case CONTEXT_THRESHOLD:
390 val = ioport__read32(data);
391 val &= ((1 << PRIORITY_PER_ID) - 1);
392 if (val <= s->max_prio)
393 c->irq_priority_threshold = val;
394 else
395 irq_update = true;
396 break;
397 case CONTEXT_CLAIM:
398 val = ioport__read32(data);
399 irq_word = val / 32;
400 irq_mask = 1 << (val % 32);
401 if ((val < plic.num_irq) &&
402 (c->irq_enable[irq_word] & irq_mask)) {
403 c->irq_claimed[irq_word] &= ~irq_mask;
404 irq_update = true;
405 }
406 break;
407 default:
408 irq_update = true;
409 break;
410 };
411
412 if (irq_update)
413 __plic_context_irq_update(s, c);
414
415 mutex_unlock(&c->irq_lock);
416 }
417
plic__mmio_callback(struct kvm_cpu * vcpu,u64 addr,u8 * data,u32 len,u8 is_write,void * ptr)418 static void plic__mmio_callback(struct kvm_cpu *vcpu,
419 u64 addr, u8 *data, u32 len,
420 u8 is_write, void *ptr)
421 {
422 u32 cntx;
423 struct plic_state *s = ptr;
424
425 if (len != 4)
426 die("plic: invalid len=%d", len);
427
428 addr &= ~0x3;
429 addr -= RISCV_IRQCHIP;
430
431 if (is_write) {
432 if (PRIORITY_BASE <= addr && addr < ENABLE_BASE) {
433 plic__priority_write(s, addr, data);
434 } else if (ENABLE_BASE <= addr && addr < CONTEXT_BASE) {
435 cntx = (addr - ENABLE_BASE) / ENABLE_PER_HART;
436 addr -= cntx * ENABLE_PER_HART + ENABLE_BASE;
437 if (cntx < s->num_context)
438 plic__context_enable_write(s,
439 &s->contexts[cntx],
440 addr, data);
441 } else if (CONTEXT_BASE <= addr && addr < REG_SIZE) {
442 cntx = (addr - CONTEXT_BASE) / CONTEXT_PER_HART;
443 addr -= cntx * CONTEXT_PER_HART + CONTEXT_BASE;
444 if (cntx < s->num_context)
445 plic__context_write(s, &s->contexts[cntx],
446 addr, data);
447 }
448 } else {
449 if (PRIORITY_BASE <= addr && addr < ENABLE_BASE) {
450 plic__priority_read(s, addr, data);
451 } else if (ENABLE_BASE <= addr && addr < CONTEXT_BASE) {
452 cntx = (addr - ENABLE_BASE) / ENABLE_PER_HART;
453 addr -= cntx * ENABLE_PER_HART + ENABLE_BASE;
454 if (cntx < s->num_context)
455 plic__context_enable_read(s,
456 &s->contexts[cntx],
457 addr, data);
458 } else if (CONTEXT_BASE <= addr && addr < REG_SIZE) {
459 cntx = (addr - CONTEXT_BASE) / CONTEXT_PER_HART;
460 addr -= cntx * CONTEXT_PER_HART + CONTEXT_BASE;
461 if (cntx < s->num_context)
462 plic__context_read(s, &s->contexts[cntx],
463 addr, data);
464 }
465 }
466 }
467
plic__generate_fdt_node(void * fdt,struct kvm * kvm)468 static void plic__generate_fdt_node(void *fdt, struct kvm *kvm)
469 {
470 u32 i;
471 char name[64];
472 u32 reg_cells[4], *irq_cells;
473
474 reg_cells[0] = 0;
475 reg_cells[1] = cpu_to_fdt32(RISCV_IRQCHIP);
476 reg_cells[2] = 0;
477 reg_cells[3] = cpu_to_fdt32(RISCV_IRQCHIP_SIZE);
478
479 irq_cells = calloc(plic.num_context * 2, sizeof(u32));
480 if (!irq_cells)
481 die("Failed to alloc irq_cells");
482
483 sprintf(name, "interrupt-controller@%08x", (u32)RISCV_IRQCHIP);
484 _FDT(fdt_begin_node(fdt, name));
485 _FDT(fdt_property_string(fdt, "compatible", "riscv,plic0"));
486 _FDT(fdt_property(fdt, "reg", reg_cells, sizeof(reg_cells)));
487 _FDT(fdt_property_cell(fdt, "#interrupt-cells", 1));
488 _FDT(fdt_property(fdt, "interrupt-controller", NULL, 0));
489 _FDT(fdt_property_cell(fdt, "riscv,max-priority", plic.max_prio));
490 _FDT(fdt_property_cell(fdt, "riscv,ndev", MAX_DEVICES - 1));
491 _FDT(fdt_property_cell(fdt, "phandle", PHANDLE_PLIC));
492 for (i = 0; i < (plic.num_context / 2); i++) {
493 irq_cells[4*i + 0] = cpu_to_fdt32(PHANDLE_CPU_INTC_BASE + i);
494 irq_cells[4*i + 1] = cpu_to_fdt32(0xffffffff);
495 irq_cells[4*i + 2] = cpu_to_fdt32(PHANDLE_CPU_INTC_BASE + i);
496 irq_cells[4*i + 3] = cpu_to_fdt32(9);
497 }
498 _FDT(fdt_property(fdt, "interrupts-extended", irq_cells,
499 sizeof(u32) * plic.num_context * 2));
500 _FDT(fdt_end_node(fdt));
501
502 free(irq_cells);
503 }
504
plic__irq_routing_init(struct kvm * kvm)505 static int plic__irq_routing_init(struct kvm *kvm)
506 {
507 int r;
508
509 /*
510 * This describes the default routing that the kernel uses without
511 * any routing explicitly set up via KVM_SET_GSI_ROUTING. So we
512 * don't need to commit these setting right now. The first actual
513 * user (MSI routing) will engage these mappings then.
514 */
515 for (next_gsi = 0; next_gsi < MAX_DEVICES; next_gsi++) {
516 r = irq__allocate_routing_entry();
517 if (r)
518 return r;
519
520 irq_routing->entries[irq_routing->nr++] =
521 (struct kvm_irq_routing_entry) {
522 .gsi = next_gsi,
523 .type = KVM_IRQ_ROUTING_IRQCHIP,
524 .u.irqchip.irqchip = IRQCHIP_PLIC_NR,
525 .u.irqchip.pin = next_gsi,
526 };
527 }
528
529 return 0;
530 }
531
plic__init(struct kvm * kvm)532 static int plic__init(struct kvm *kvm)
533 {
534 u32 i;
535 int ret;
536 struct plic_context *c;
537
538 if (riscv_irqchip != IRQCHIP_PLIC)
539 return 0;
540
541 plic.kvm = kvm;
542 plic.num_irq = MAX_DEVICES;
543 plic.num_irq_word = plic.num_irq / 32;
544 if ((plic.num_irq_word * 32) < plic.num_irq)
545 plic.num_irq_word++;
546 plic.max_prio = (1UL << PRIORITY_PER_ID) - 1;
547
548 plic.num_context = kvm->nrcpus * 2;
549 plic.contexts = calloc(plic.num_context, sizeof(struct plic_context));
550 if (!plic.contexts)
551 return -ENOMEM;
552 for (i = 0; i < plic.num_context; i++) {
553 c = &plic.contexts[i];
554 c->s = &plic;
555 c->num = i;
556 c->vcpu = kvm->cpus[i / 2];
557 mutex_init(&c->irq_lock);
558 }
559
560 mutex_init(&plic.irq_lock);
561
562 ret = kvm__register_mmio(kvm, RISCV_IRQCHIP, RISCV_IRQCHIP_SIZE,
563 false, plic__mmio_callback, &plic);
564 if (ret)
565 return ret;
566
567 /* Setup default IRQ routing */
568 plic__irq_routing_init(kvm);
569
570 plic.ready = true;
571
572 return 0;
573
574 }
575 dev_init(plic__init);
576
plic__exit(struct kvm * kvm)577 static int plic__exit(struct kvm *kvm)
578 {
579 if (riscv_irqchip != IRQCHIP_PLIC)
580 return 0;
581
582 plic.ready = false;
583 kvm__deregister_mmio(kvm, RISCV_IRQCHIP);
584 free(plic.contexts);
585
586 return 0;
587 }
588 dev_exit(plic__exit);
589
plic__create(struct kvm * kvm)590 void plic__create(struct kvm *kvm)
591 {
592 if (riscv_irqchip != IRQCHIP_UNKNOWN)
593 return;
594
595 riscv_irqchip = IRQCHIP_PLIC;
596 riscv_irqchip_inkernel = false;
597 riscv_irqchip_trigger = plic__irq_trig;
598 riscv_irqchip_generate_fdt_node = plic__generate_fdt_node;
599 riscv_irqchip_phandle = PHANDLE_PLIC;
600 riscv_irqchip_msi_phandle = PHANDLE_RESERVED;
601 riscv_irqchip_line_sensing = false;
602 }
603