1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/param.h>
30 #include <sys/linker_set.h>
31 #include <sys/mman.h>
32
33 #include <ctype.h>
34 #include <err.h>
35 #include <errno.h>
36 #include <pthread.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <strings.h>
41 #include <assert.h>
42 #include <stdbool.h>
43 #include <sysexits.h>
44
45 #include <dev/vmm/vmm_mem.h>
46 #include <machine/vmm.h>
47 #include <machine/vmm_snapshot.h>
48 #include <vmmapi.h>
49
50 #include "acpi.h"
51 #include "bhyverun.h"
52 #include "bootrom.h"
53 #include "config.h"
54 #include "debug.h"
55 #ifdef __amd64__
56 #include "amd64/inout.h"
57 #endif
58 #include "mem.h"
59 #include "pci_emul.h"
60 #ifdef __amd64__
61 #include "amd64/pci_lpc.h"
62 #include "pci_passthru.h"
63 #endif
64 #include "qemu_fwcfg.h"
65
66 #define CONF1_ADDR_PORT 0x0cf8
67 #define CONF1_DATA_PORT 0x0cfc
68
69 #define CONF1_ENABLE 0x80000000ul
70
71 #define MAXBUSES (PCI_BUSMAX + 1)
72 #define MAXSLOTS (PCI_SLOTMAX + 1)
73 #define MAXFUNCS (PCI_FUNCMAX + 1)
74
75 #define GB (1024 * 1024 * 1024UL)
76
77 struct funcinfo {
78 nvlist_t *fi_config;
79 struct pci_devemu *fi_pde;
80 struct pci_devinst *fi_devi;
81 };
82
83 struct intxinfo {
84 int ii_count;
85 struct pci_irq ii_irq;
86 };
87
88 struct slotinfo {
89 struct intxinfo si_intpins[4];
90 struct funcinfo si_funcs[MAXFUNCS];
91 };
92
93 struct businfo {
94 uint16_t iobase, iolimit; /* I/O window */
95 uint32_t membase32, memlimit32; /* mmio window below 4GB */
96 uint64_t membase64, memlimit64; /* mmio window above 4GB */
97 struct slotinfo slotinfo[MAXSLOTS];
98 };
99
100 static struct businfo *pci_businfo[MAXBUSES];
101
102 SET_DECLARE(pci_devemu_set, struct pci_devemu);
103
104 static uint64_t pci_emul_iobase;
105 static uint8_t *pci_emul_rombase;
106 static uint64_t pci_emul_romoffset;
107 static uint8_t *pci_emul_romlim;
108 static uint64_t pci_emul_membase32;
109 static uint64_t pci_emul_membase64;
110 static uint64_t pci_emul_memlim64;
111
112 struct pci_bar_allocation {
113 TAILQ_ENTRY(pci_bar_allocation) chain;
114 struct pci_devinst *pdi;
115 int idx;
116 enum pcibar_type type;
117 uint64_t size;
118 };
119
120 static TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars =
121 TAILQ_HEAD_INITIALIZER(pci_bars);
122
123 struct boot_device {
124 TAILQ_ENTRY(boot_device) boot_device_chain;
125 struct pci_devinst *pdi;
126 int bootindex;
127 };
128 static TAILQ_HEAD(boot_list, boot_device) boot_devices = TAILQ_HEAD_INITIALIZER(
129 boot_devices);
130
131 #if defined(__amd64__)
132 #define PCI_EMUL_IOBASE 0x2000
133 #define PCI_EMUL_IOLIMIT 0x10000
134 #define PCI_EMUL_IOMASK 0xffff
135 /*
136 * OVMF always uses 0xc0000000 as base address for 32 bit PCI MMIO. Don't
137 * change this address without changing it in OVMF.
138 */
139 #define PCI_EMUL_MEMBASE32 0xc0000000
140 #elif defined(__aarch64__) || defined(__riscv)
141 #define PCI_EMUL_IOBASE 0xdf000000UL
142 #define PCI_EMUL_IOLIMIT 0xe0000000UL
143 #define PCI_EMUL_MEMBASE32 0xa0000000UL
144 #else
145 #error Unsupported platform
146 #endif
147
148 #define PCI_EMUL_ROMSIZE 0x10000000
149
150 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */
151 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */
152 #ifdef __amd64__
153 SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE);
154 #endif
155
156 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE
157 #define PCI_EMUL_MEMSIZE64 (32*GB)
158
159 static void pci_lintr_route(struct pci_devinst *pi);
160 static void pci_lintr_update(struct pci_devinst *pi);
161
162 static struct pci_devemu *pci_emul_finddev(const char *name);
163 static void pci_cfgrw(int in, int bus, int slot, int func, int coff,
164 int bytes, uint32_t *val);
165
166 static __inline void
CFGWRITE(struct pci_devinst * pi,int coff,uint32_t val,int bytes)167 CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes)
168 {
169
170 if (bytes == 1)
171 pci_set_cfgdata8(pi, coff, val);
172 else if (bytes == 2)
173 pci_set_cfgdata16(pi, coff, val);
174 else
175 pci_set_cfgdata32(pi, coff, val);
176 }
177
178 static __inline uint32_t
CFGREAD(struct pci_devinst * pi,int coff,int bytes)179 CFGREAD(struct pci_devinst *pi, int coff, int bytes)
180 {
181
182 if (bytes == 1)
183 return (pci_get_cfgdata8(pi, coff));
184 else if (bytes == 2)
185 return (pci_get_cfgdata16(pi, coff));
186 else
187 return (pci_get_cfgdata32(pi, coff));
188 }
189
190 static int
is_pcir_bar(int coff)191 is_pcir_bar(int coff)
192 {
193 return (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1));
194 }
195
196 static int
is_pcir_bios(int coff)197 is_pcir_bios(int coff)
198 {
199 return (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4);
200 }
201
202 /*
203 * I/O access
204 */
205
206 /*
207 * Slot options are in the form:
208 *
209 * <bus>:<slot>:<func>,<emul>[,<config>]
210 * <slot>[:<func>],<emul>[,<config>]
211 *
212 * slot is 0..31
213 * func is 0..7
214 * emul is a string describing the type of PCI device e.g. virtio-net
215 * config is an optional string, depending on the device, that can be
216 * used for configuration.
217 * Examples are:
218 * 1,virtio-net,tap0
219 * 3:0,dummy
220 */
221 static void
pci_parse_slot_usage(char * aopt)222 pci_parse_slot_usage(char *aopt)
223 {
224
225 EPRINTLN("Invalid PCI slot info field \"%s\"", aopt);
226 }
227
228 /*
229 * Helper function to parse a list of comma-separated options where
230 * each option is formatted as "name[=value]". If no value is
231 * provided, the option is treated as a boolean and is given a value
232 * of true.
233 */
234 int
pci_parse_legacy_config(nvlist_t * nvl,const char * opt)235 pci_parse_legacy_config(nvlist_t *nvl, const char *opt)
236 {
237 char *config, *name, *tofree, *value;
238
239 if (opt == NULL)
240 return (0);
241
242 config = tofree = strdup(opt);
243 while ((name = strsep(&config, ",")) != NULL) {
244 value = strchr(name, '=');
245 if (value != NULL) {
246 *value = '\0';
247 value++;
248 set_config_value_node(nvl, name, value);
249 } else
250 set_config_bool_node(nvl, name, true);
251 }
252 free(tofree);
253 return (0);
254 }
255
256 /*
257 * PCI device configuration is stored in MIBs that encode the device's
258 * location:
259 *
260 * pci.<bus>.<slot>.<func>
261 *
262 * Where "bus", "slot", and "func" are all decimal values without
263 * leading zeroes. Each valid device must have a "device" node which
264 * identifies the driver model of the device.
265 *
266 * Device backends can provide a parser for the "config" string. If
267 * a custom parser is not provided, pci_parse_legacy_config() is used
268 * to parse the string.
269 */
270 int
pci_parse_slot(char * opt)271 pci_parse_slot(char *opt)
272 {
273 char node_name[sizeof("pci.XXX.XX.X")];
274 struct pci_devemu *pde;
275 char *emul, *config, *str, *cp;
276 int error, bnum, snum, fnum;
277 nvlist_t *nvl;
278
279 error = -1;
280 str = strdup(opt);
281
282 emul = config = NULL;
283 if ((cp = strchr(str, ',')) != NULL) {
284 *cp = '\0';
285 emul = cp + 1;
286 if ((cp = strchr(emul, ',')) != NULL) {
287 *cp = '\0';
288 config = cp + 1;
289 }
290 } else {
291 pci_parse_slot_usage(opt);
292 goto done;
293 }
294
295 /* <bus>:<slot>:<func> */
296 if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) {
297 bnum = 0;
298 /* <slot>:<func> */
299 if (sscanf(str, "%d:%d", &snum, &fnum) != 2) {
300 fnum = 0;
301 /* <slot> */
302 if (sscanf(str, "%d", &snum) != 1) {
303 snum = -1;
304 }
305 }
306 }
307
308 if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS ||
309 fnum < 0 || fnum >= MAXFUNCS) {
310 pci_parse_slot_usage(opt);
311 goto done;
312 }
313
314 pde = pci_emul_finddev(emul);
315 if (pde == NULL) {
316 EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum,
317 fnum, emul);
318 goto done;
319 }
320
321 snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum,
322 fnum);
323 nvl = find_config_node(node_name);
324 if (nvl != NULL) {
325 EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum,
326 fnum);
327 goto done;
328 }
329 nvl = create_config_node(node_name);
330 if (pde->pe_alias != NULL)
331 set_config_value_node(nvl, "device", pde->pe_alias);
332 else
333 set_config_value_node(nvl, "device", pde->pe_emu);
334
335 if (pde->pe_legacy_config != NULL)
336 error = pde->pe_legacy_config(nvl, config);
337 else
338 error = pci_parse_legacy_config(nvl, config);
339 done:
340 free(str);
341 return (error);
342 }
343
344 void
pci_print_supported_devices(void)345 pci_print_supported_devices(void)
346 {
347 struct pci_devemu **pdpp, *pdp;
348
349 SET_FOREACH(pdpp, pci_devemu_set) {
350 pdp = *pdpp;
351 printf("%s\n", pdp->pe_emu);
352 }
353 }
354
355 uint32_t
pci_config_read_reg(const struct pci_conf * host_conf,nvlist_t * nvl,const uint32_t reg,const uint8_t size,const uint32_t def)356 pci_config_read_reg(const struct pci_conf *host_conf, nvlist_t *nvl,
357 const uint32_t reg, const uint8_t size, const uint32_t def)
358 {
359 const char *config;
360 const nvlist_t *pci_regs;
361 uint32_t host;
362
363 assert(size == 1 || size == 2 || size == 4);
364
365 pci_regs = find_relative_config_node(nvl, "pcireg");
366 if (pci_regs == NULL) {
367 return (def);
368 }
369
370 switch (reg) {
371 case PCIR_DEVICE:
372 config = get_config_value_node(pci_regs, "device");
373 host = host_conf != NULL ? host_conf->pc_device : 0;
374 break;
375 case PCIR_VENDOR:
376 config = get_config_value_node(pci_regs, "vendor");
377 host = host_conf != NULL ? host_conf->pc_vendor : 0;
378 break;
379 case PCIR_REVID:
380 config = get_config_value_node(pci_regs, "revid");
381 host = host_conf != NULL ? host_conf->pc_revid : 0;
382 break;
383 case PCIR_SUBVEND_0:
384 config = get_config_value_node(pci_regs, "subvendor");
385 host = host_conf != NULL ? host_conf->pc_subvendor : 0;
386 break;
387 case PCIR_SUBDEV_0:
388 config = get_config_value_node(pci_regs, "subdevice");
389 host = host_conf != NULL ? host_conf->pc_subdevice : 0;
390 break;
391 default:
392 return (-1);
393 }
394
395 if (config == NULL) {
396 return (def);
397 } else if (host_conf != NULL && strcmp(config, "host") == 0) {
398 return (host);
399 } else {
400 return (strtol(config, NULL, 16));
401 }
402 }
403
404 static int
pci_valid_pba_offset(struct pci_devinst * pi,uint64_t offset)405 pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset)
406 {
407
408 if (offset < pi->pi_msix.pba_offset)
409 return (0);
410
411 if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) {
412 return (0);
413 }
414
415 return (1);
416 }
417
418 int
pci_emul_msix_twrite(struct pci_devinst * pi,uint64_t offset,int size,uint64_t value)419 pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size,
420 uint64_t value)
421 {
422 int msix_entry_offset;
423 int tab_index;
424 char *dest;
425
426 /* support only 4 or 8 byte writes */
427 if (size != 4 && size != 8)
428 return (-1);
429
430 /*
431 * Return if table index is beyond what device supports
432 */
433 tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
434 if (tab_index >= pi->pi_msix.table_count)
435 return (-1);
436
437 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
438
439 /* support only aligned writes */
440 if ((msix_entry_offset % size) != 0)
441 return (-1);
442
443 dest = (char *)(pi->pi_msix.table + tab_index);
444 dest += msix_entry_offset;
445
446 if (size == 4)
447 *((uint32_t *)dest) = value;
448 else
449 *((uint64_t *)dest) = value;
450
451 return (0);
452 }
453
454 uint64_t
pci_emul_msix_tread(struct pci_devinst * pi,uint64_t offset,int size)455 pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size)
456 {
457 char *dest;
458 int msix_entry_offset;
459 int tab_index;
460 uint64_t retval = ~0;
461
462 /*
463 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X
464 * table but we also allow 1 byte access to accommodate reads from
465 * ddb.
466 */
467 if (size != 1 && size != 4 && size != 8)
468 return (retval);
469
470 msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
471
472 /* support only aligned reads */
473 if ((msix_entry_offset % size) != 0) {
474 return (retval);
475 }
476
477 tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
478
479 if (tab_index < pi->pi_msix.table_count) {
480 /* valid MSI-X Table access */
481 dest = (char *)(pi->pi_msix.table + tab_index);
482 dest += msix_entry_offset;
483
484 if (size == 1)
485 retval = *((uint8_t *)dest);
486 else if (size == 4)
487 retval = *((uint32_t *)dest);
488 else
489 retval = *((uint64_t *)dest);
490 } else if (pci_valid_pba_offset(pi, offset)) {
491 /* return 0 for PBA access */
492 retval = 0;
493 }
494
495 return (retval);
496 }
497
498 int
pci_msix_table_bar(struct pci_devinst * pi)499 pci_msix_table_bar(struct pci_devinst *pi)
500 {
501
502 if (pi->pi_msix.table != NULL)
503 return (pi->pi_msix.table_bar);
504 else
505 return (-1);
506 }
507
508 int
pci_msix_pba_bar(struct pci_devinst * pi)509 pci_msix_pba_bar(struct pci_devinst *pi)
510 {
511
512 if (pi->pi_msix.table != NULL)
513 return (pi->pi_msix.pba_bar);
514 else
515 return (-1);
516 }
517
518 #ifdef __amd64__
519 static int
pci_emul_io_handler(struct vmctx * ctx __unused,int in,int port,int bytes,uint32_t * eax,void * arg)520 pci_emul_io_handler(struct vmctx *ctx __unused, int in, int port,
521 int bytes, uint32_t *eax, void *arg)
522 {
523 struct pci_devinst *pdi = arg;
524 struct pci_devemu *pe = pdi->pi_d;
525 uint64_t offset;
526 int i;
527
528 assert(port >= 0);
529
530 for (i = 0; i <= PCI_BARMAX; i++) {
531 if (pdi->pi_bar[i].type == PCIBAR_IO &&
532 (uint64_t)port >= pdi->pi_bar[i].addr &&
533 (uint64_t)port + bytes <=
534 pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
535 offset = port - pdi->pi_bar[i].addr;
536 if (in)
537 *eax = (*pe->pe_barread)(pdi, i,
538 offset, bytes);
539 else
540 (*pe->pe_barwrite)(pdi, i, offset,
541 bytes, *eax);
542 return (0);
543 }
544 }
545 return (-1);
546 }
547 #else
548 static int
pci_emul_iomem_handler(struct vcpu * vcpu __unused,int dir,uint64_t addr,int size,uint64_t * val,void * arg1,long arg2)549 pci_emul_iomem_handler(struct vcpu *vcpu __unused, int dir,
550 uint64_t addr, int size, uint64_t *val, void *arg1, long arg2)
551 {
552 struct pci_devinst *pdi = arg1;
553 struct pci_devemu *pe = pdi->pi_d;
554 uint64_t offset;
555 int bidx = (int)arg2;
556
557 assert(bidx <= PCI_BARMAX);
558 assert(pdi->pi_bar[bidx].type == PCIBAR_IO);
559 assert(addr >= pdi->pi_bar[bidx].addr &&
560 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size);
561 assert(size == 1 || size == 2 || size == 4);
562
563 offset = addr - pdi->pi_bar[bidx].addr;
564 if (dir == MEM_F_READ)
565 *val = (*pe->pe_barread)(pdi, bidx, offset, size);
566 else
567 (*pe->pe_barwrite)(pdi, bidx, offset, size, *val);
568
569 return (0);
570 }
571 #endif /* !__amd64__ */
572
573 static int
pci_emul_mem_handler(struct vcpu * vcpu __unused,int dir,uint64_t addr,int size,uint64_t * val,void * arg1,long arg2)574 pci_emul_mem_handler(struct vcpu *vcpu __unused, int dir,
575 uint64_t addr, int size, uint64_t *val, void *arg1, long arg2)
576 {
577 struct pci_devinst *pdi = arg1;
578 struct pci_devemu *pe = pdi->pi_d;
579 uint64_t offset;
580 int bidx = (int)arg2;
581
582 assert(bidx <= PCI_BARMAX);
583 assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 ||
584 pdi->pi_bar[bidx].type == PCIBAR_MEM64);
585 assert(addr >= pdi->pi_bar[bidx].addr &&
586 addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size);
587
588 offset = addr - pdi->pi_bar[bidx].addr;
589
590 if (dir == MEM_F_WRITE) {
591 if (size == 8) {
592 (*pe->pe_barwrite)(pdi, bidx, offset,
593 4, *val & 0xffffffff);
594 (*pe->pe_barwrite)(pdi, bidx, offset + 4,
595 4, *val >> 32);
596 } else {
597 (*pe->pe_barwrite)(pdi, bidx, offset,
598 size, *val);
599 }
600 } else {
601 if (size == 8) {
602 *val = (*pe->pe_barread)(pdi, bidx,
603 offset, 4);
604 *val |= (*pe->pe_barread)(pdi, bidx,
605 offset + 4, 4) << 32;
606 } else {
607 *val = (*pe->pe_barread)(pdi, bidx,
608 offset, size);
609 }
610 }
611
612 return (0);
613 }
614
615
616 static int
pci_emul_alloc_resource(uint64_t * baseptr,uint64_t limit,uint64_t size,uint64_t * addr)617 pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
618 uint64_t *addr)
619 {
620 uint64_t base;
621
622 assert((size & (size - 1)) == 0); /* must be a power of 2 */
623
624 base = roundup2(*baseptr, size);
625
626 if (base + size <= limit) {
627 *addr = base;
628 *baseptr = base + size;
629 return (0);
630 } else
631 return (-1);
632 }
633
634 /*
635 * Register (or unregister) the MMIO or I/O region associated with the BAR
636 * register 'idx' of an emulated pci device.
637 */
638 static void
modify_bar_registration(struct pci_devinst * pi,int idx,int registration)639 modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
640 {
641 struct pci_devemu *pe;
642 int error;
643 enum pcibar_type type;
644
645 pe = pi->pi_d;
646 type = pi->pi_bar[idx].type;
647 switch (type) {
648 case PCIBAR_IO:
649 {
650 #ifdef __amd64__
651 struct inout_port iop;
652
653 bzero(&iop, sizeof(struct inout_port));
654 iop.name = pi->pi_name;
655 iop.port = pi->pi_bar[idx].addr;
656 iop.size = pi->pi_bar[idx].size;
657 if (registration) {
658 iop.flags = IOPORT_F_INOUT;
659 iop.handler = pci_emul_io_handler;
660 iop.arg = pi;
661 error = register_inout(&iop);
662 } else
663 error = unregister_inout(&iop);
664 #else
665 struct mem_range mr;
666
667 bzero(&mr, sizeof(struct mem_range));
668 mr.name = pi->pi_name;
669 mr.base = pi->pi_bar[idx].addr;
670 mr.size = pi->pi_bar[idx].size;
671 if (registration) {
672 mr.flags = MEM_F_RW;
673 mr.handler = pci_emul_iomem_handler;
674 mr.arg1 = pi;
675 mr.arg2 = idx;
676 error = register_mem(&mr);
677 } else
678 error = unregister_mem(&mr);
679 #endif
680 break;
681 }
682 case PCIBAR_MEM32:
683 case PCIBAR_MEM64:
684 {
685 struct mem_range mr;
686
687 bzero(&mr, sizeof(struct mem_range));
688 mr.name = pi->pi_name;
689 mr.base = pi->pi_bar[idx].addr;
690 mr.size = pi->pi_bar[idx].size;
691 if (registration) {
692 mr.flags = MEM_F_RW;
693 mr.handler = pci_emul_mem_handler;
694 mr.arg1 = pi;
695 mr.arg2 = idx;
696 error = register_mem(&mr);
697 } else
698 error = unregister_mem(&mr);
699 break;
700 }
701 case PCIBAR_ROM:
702 error = 0;
703 break;
704 default:
705 error = EINVAL;
706 break;
707 }
708 assert(error == 0);
709
710 if (pe->pe_baraddr != NULL)
711 (*pe->pe_baraddr)(pi, idx, registration, pi->pi_bar[idx].addr);
712 }
713
714 static void
unregister_bar(struct pci_devinst * pi,int idx)715 unregister_bar(struct pci_devinst *pi, int idx)
716 {
717
718 modify_bar_registration(pi, idx, 0);
719 }
720
721 static void
register_bar(struct pci_devinst * pi,int idx)722 register_bar(struct pci_devinst *pi, int idx)
723 {
724
725 modify_bar_registration(pi, idx, 1);
726 }
727
728 /* Is the ROM enabled for the emulated pci device? */
729 static int
romen(struct pci_devinst * pi)730 romen(struct pci_devinst *pi)
731 {
732 return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) ==
733 PCIM_BIOS_ENABLE;
734 }
735
736 /* Are we decoding i/o port accesses for the emulated pci device? */
737 static int
porten(struct pci_devinst * pi)738 porten(struct pci_devinst *pi)
739 {
740 uint16_t cmd;
741
742 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
743
744 return (cmd & PCIM_CMD_PORTEN);
745 }
746
747 /* Are we decoding memory accesses for the emulated pci device? */
748 static int
memen(struct pci_devinst * pi)749 memen(struct pci_devinst *pi)
750 {
751 uint16_t cmd;
752
753 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
754
755 return (cmd & PCIM_CMD_MEMEN);
756 }
757
758 /*
759 * Update the MMIO or I/O address that is decoded by the BAR register.
760 *
761 * If the pci device has enabled the address space decoding then intercept
762 * the address range decoded by the BAR register.
763 */
764 static void
update_bar_address(struct pci_devinst * pi,uint64_t addr,int idx,int type)765 update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type)
766 {
767 int decode;
768
769 if (pi->pi_bar[idx].type == PCIBAR_IO)
770 decode = porten(pi);
771 else
772 decode = memen(pi);
773
774 if (decode)
775 unregister_bar(pi, idx);
776
777 switch (type) {
778 case PCIBAR_IO:
779 case PCIBAR_MEM32:
780 pi->pi_bar[idx].addr = addr;
781 break;
782 case PCIBAR_MEM64:
783 pi->pi_bar[idx].addr &= ~0xffffffffUL;
784 pi->pi_bar[idx].addr |= addr;
785 break;
786 case PCIBAR_MEMHI64:
787 pi->pi_bar[idx].addr &= 0xffffffff;
788 pi->pi_bar[idx].addr |= addr;
789 break;
790 default:
791 assert(0);
792 }
793
794 if (decode)
795 register_bar(pi, idx);
796 }
797
798 int
pci_emul_alloc_bar(struct pci_devinst * pdi,int idx,enum pcibar_type type,uint64_t size)799 pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
800 uint64_t size)
801 {
802 assert((type == PCIBAR_ROM) || (idx >= 0 && idx <= PCI_BARMAX));
803 assert((type != PCIBAR_ROM) || (idx == PCI_ROM_IDX));
804
805 if ((size & (size - 1)) != 0)
806 size = 1UL << flsl(size); /* round up to a power of 2 */
807
808 /* Enforce minimum BAR sizes required by the PCI standard */
809 if (type == PCIBAR_IO) {
810 if (size < 4)
811 size = 4;
812 } else if (type == PCIBAR_ROM) {
813 if (size < ~PCIM_BIOS_ADDR_MASK + 1)
814 size = ~PCIM_BIOS_ADDR_MASK + 1;
815 } else {
816 if (size < 16)
817 size = 16;
818 }
819
820 /*
821 * To reduce fragmentation of the MMIO space, we allocate the BARs by
822 * size. Therefore, don't allocate the BAR yet. We create a list of all
823 * BAR allocation which is sorted by BAR size. When all PCI devices are
824 * initialized, we will assign an address to the BARs.
825 */
826
827 /* create a new list entry */
828 struct pci_bar_allocation *const new_bar = malloc(sizeof(*new_bar));
829 memset(new_bar, 0, sizeof(*new_bar));
830 new_bar->pdi = pdi;
831 new_bar->idx = idx;
832 new_bar->type = type;
833 new_bar->size = size;
834
835 /*
836 * Search for a BAR which size is lower than the size of our newly
837 * allocated BAR.
838 */
839 struct pci_bar_allocation *bar = NULL;
840 TAILQ_FOREACH(bar, &pci_bars, chain) {
841 if (bar->size < size) {
842 break;
843 }
844 }
845
846 if (bar == NULL) {
847 /*
848 * Either the list is empty or new BAR is the smallest BAR of
849 * the list. Append it to the end of our list.
850 */
851 TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain);
852 } else {
853 /*
854 * The found BAR is smaller than our new BAR. For that reason,
855 * insert our new BAR before the found BAR.
856 */
857 TAILQ_INSERT_BEFORE(bar, new_bar, chain);
858 }
859
860 /*
861 * Enable PCI BARs only if we don't have a boot ROM, i.e., bhyveload was
862 * used to load the initial guest image. Otherwise, we rely on the boot
863 * ROM to handle this.
864 */
865 if (!get_config_bool_default("pci.enable_bars", !bootrom_boot()))
866 return (0);
867
868 /*
869 * pci_passthru devices synchronize their physical and virtual command
870 * register on init. For that reason, the virtual cmd reg should be
871 * updated as early as possible.
872 */
873 uint16_t enbit = 0;
874 switch (type) {
875 case PCIBAR_IO:
876 enbit = PCIM_CMD_PORTEN;
877 break;
878 case PCIBAR_MEM64:
879 case PCIBAR_MEM32:
880 enbit = PCIM_CMD_MEMEN;
881 break;
882 default:
883 enbit = 0;
884 break;
885 }
886
887 const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
888 pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
889
890 return (0);
891 }
892
893 static int
pci_emul_assign_bar(struct pci_devinst * const pdi,const int idx,const enum pcibar_type type,const uint64_t size)894 pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx,
895 const enum pcibar_type type, const uint64_t size)
896 {
897 int error;
898 uint64_t *baseptr, limit, addr, mask, lobits, bar;
899
900 switch (type) {
901 case PCIBAR_NONE:
902 baseptr = NULL;
903 addr = mask = lobits = 0;
904 break;
905 case PCIBAR_IO:
906 baseptr = &pci_emul_iobase;
907 limit = PCI_EMUL_IOLIMIT;
908 mask = PCIM_BAR_IO_BASE;
909 lobits = PCIM_BAR_IO_SPACE;
910 break;
911 case PCIBAR_MEM64:
912 /*
913 * XXX
914 * Some drivers do not work well if the 64-bit BAR is allocated
915 * above 4GB. Allow for this by allocating small requests under
916 * 4GB unless then allocation size is larger than some arbitrary
917 * number (128MB currently).
918 */
919 if (size > 128 * 1024 * 1024) {
920 baseptr = &pci_emul_membase64;
921 limit = pci_emul_memlim64;
922 mask = PCIM_BAR_MEM_BASE;
923 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
924 PCIM_BAR_MEM_PREFETCH;
925 } else {
926 baseptr = &pci_emul_membase32;
927 limit = PCI_EMUL_MEMLIMIT32;
928 mask = PCIM_BAR_MEM_BASE;
929 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
930 }
931 break;
932 case PCIBAR_MEM32:
933 baseptr = &pci_emul_membase32;
934 limit = PCI_EMUL_MEMLIMIT32;
935 mask = PCIM_BAR_MEM_BASE;
936 lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
937 break;
938 case PCIBAR_ROM:
939 /* do not claim memory for ROM. OVMF will do it for us. */
940 baseptr = NULL;
941 limit = 0;
942 mask = PCIM_BIOS_ADDR_MASK;
943 lobits = 0;
944 break;
945 default:
946 printf("pci_emul_alloc_base: invalid bar type %d\n", type);
947 assert(0);
948 }
949
950 if (baseptr != NULL) {
951 error = pci_emul_alloc_resource(baseptr, limit, size, &addr);
952 if (error != 0)
953 return (error);
954 } else {
955 addr = 0;
956 }
957
958 pdi->pi_bar[idx].type = type;
959 pdi->pi_bar[idx].addr = addr;
960 pdi->pi_bar[idx].size = size;
961 /*
962 * passthru devices are using same lobits as physical device they set
963 * this property
964 */
965 if (pdi->pi_bar[idx].lobits != 0) {
966 lobits = pdi->pi_bar[idx].lobits;
967 } else {
968 pdi->pi_bar[idx].lobits = lobits;
969 }
970
971 /* Initialize the BAR register in config space */
972 bar = (addr & mask) | lobits;
973 pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar);
974
975 if (type == PCIBAR_MEM64) {
976 assert(idx + 1 <= PCI_BARMAX);
977 pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64;
978 pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
979 }
980
981 switch (type) {
982 case PCIBAR_IO:
983 if (porten(pdi))
984 register_bar(pdi, idx);
985 break;
986 case PCIBAR_MEM32:
987 case PCIBAR_MEM64:
988 case PCIBAR_MEMHI64:
989 if (memen(pdi))
990 register_bar(pdi, idx);
991 break;
992 default:
993 break;
994 }
995
996 return (0);
997 }
998
999 int
pci_emul_alloc_rom(struct pci_devinst * const pdi,const uint64_t size,void ** const addr)1000 pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size,
1001 void **const addr)
1002 {
1003 /* allocate ROM space once on first call */
1004 if (pci_emul_rombase == 0) {
1005 pci_emul_rombase = vm_create_devmem(pdi->pi_vmctx, VM_PCIROM,
1006 "pcirom", PCI_EMUL_ROMSIZE);
1007 if (pci_emul_rombase == MAP_FAILED) {
1008 warnx("%s: failed to create rom segment", __func__);
1009 return (-1);
1010 }
1011 pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE;
1012 pci_emul_romoffset = 0;
1013 }
1014
1015 /* ROM size should be a power of 2 and greater than 2 KB */
1016 const uint64_t rom_size = MAX(1UL << flsl(size),
1017 ~PCIM_BIOS_ADDR_MASK + 1);
1018
1019 /* check if ROM fits into ROM space */
1020 if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE) {
1021 warnx("%s: no space left in rom segment:", __func__);
1022 warnx("%16lu bytes left",
1023 PCI_EMUL_ROMSIZE - pci_emul_romoffset);
1024 warnx("%16lu bytes required by %d/%d/%d", rom_size, pdi->pi_bus,
1025 pdi->pi_slot, pdi->pi_func);
1026 return (-1);
1027 }
1028
1029 /* allocate ROM BAR */
1030 const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM,
1031 rom_size);
1032 if (error)
1033 return error;
1034
1035 /* return address */
1036 *addr = pci_emul_rombase + pci_emul_romoffset;
1037
1038 /* save offset into ROM Space */
1039 pdi->pi_romoffset = pci_emul_romoffset;
1040
1041 /* increase offset for next ROM */
1042 pci_emul_romoffset += rom_size;
1043
1044 return (0);
1045 }
1046
1047 int
pci_emul_add_boot_device(struct pci_devinst * pi,int bootindex)1048 pci_emul_add_boot_device(struct pci_devinst *pi, int bootindex)
1049 {
1050 struct boot_device *new_device, *device;
1051
1052 /* don't permit a negative bootindex */
1053 if (bootindex < 0) {
1054 errx(4, "Invalid bootindex %d for %s", bootindex, pi->pi_name);
1055 }
1056
1057 /* alloc new boot device */
1058 new_device = calloc(1, sizeof(struct boot_device));
1059 if (new_device == NULL) {
1060 return (ENOMEM);
1061 }
1062 new_device->pdi = pi;
1063 new_device->bootindex = bootindex;
1064
1065 /* search for boot device with higher boot index */
1066 TAILQ_FOREACH(device, &boot_devices, boot_device_chain) {
1067 if (device->bootindex == bootindex) {
1068 errx(4,
1069 "Could not set bootindex %d for %s. Bootindex already occupied by %s",
1070 bootindex, pi->pi_name, device->pdi->pi_name);
1071 } else if (device->bootindex > bootindex) {
1072 break;
1073 }
1074 }
1075
1076 /* add boot device to queue */
1077 if (device == NULL) {
1078 TAILQ_INSERT_TAIL(&boot_devices, new_device, boot_device_chain);
1079 } else {
1080 TAILQ_INSERT_BEFORE(device, new_device, boot_device_chain);
1081 }
1082
1083 return (0);
1084 }
1085
1086 #define CAP_START_OFFSET 0x40
1087 static int
pci_emul_add_capability(struct pci_devinst * pi,u_char * capdata,int caplen)1088 pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen)
1089 {
1090 int i, capoff, reallen;
1091 uint16_t sts;
1092
1093 assert(caplen > 0);
1094
1095 reallen = roundup2(caplen, 4); /* dword aligned */
1096
1097 sts = pci_get_cfgdata16(pi, PCIR_STATUS);
1098 if ((sts & PCIM_STATUS_CAPPRESENT) == 0)
1099 capoff = CAP_START_OFFSET;
1100 else
1101 capoff = pi->pi_capend + 1;
1102
1103 /* Check if we have enough space */
1104 if (capoff + reallen > PCI_REGMAX + 1)
1105 return (-1);
1106
1107 /* Set the previous capability pointer */
1108 if ((sts & PCIM_STATUS_CAPPRESENT) == 0) {
1109 pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff);
1110 pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT);
1111 } else
1112 pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff);
1113
1114 /* Copy the capability */
1115 for (i = 0; i < caplen; i++)
1116 pci_set_cfgdata8(pi, capoff + i, capdata[i]);
1117
1118 /* Set the next capability pointer */
1119 pci_set_cfgdata8(pi, capoff + 1, 0);
1120
1121 pi->pi_prevcap = capoff;
1122 pi->pi_capend = capoff + reallen - 1;
1123 return (0);
1124 }
1125
1126 static struct pci_devemu *
pci_emul_finddev(const char * name)1127 pci_emul_finddev(const char *name)
1128 {
1129 struct pci_devemu **pdpp, *pdp;
1130
1131 SET_FOREACH(pdpp, pci_devemu_set) {
1132 pdp = *pdpp;
1133 if (!strcmp(pdp->pe_emu, name)) {
1134 return (pdp);
1135 }
1136 }
1137
1138 return (NULL);
1139 }
1140
1141 static int
pci_emul_init(struct vmctx * ctx,struct pci_devemu * pde,int bus,int slot,int func,struct funcinfo * fi)1142 pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot,
1143 int func, struct funcinfo *fi)
1144 {
1145 struct pci_devinst *pdi;
1146 int err;
1147
1148 pdi = calloc(1, sizeof(struct pci_devinst));
1149
1150 pdi->pi_vmctx = ctx;
1151 pdi->pi_bus = bus;
1152 pdi->pi_slot = slot;
1153 pdi->pi_func = func;
1154 pthread_mutex_init(&pdi->pi_lintr.lock, NULL);
1155 pdi->pi_lintr.pin = 0;
1156 pdi->pi_lintr.state = IDLE;
1157 pci_irq_init_irq(&pdi->pi_lintr.irq);
1158 pdi->pi_d = pde;
1159 snprintf(pdi->pi_name, PI_NAMESZ, "%s@pci.%d.%d.%d", pde->pe_emu, bus,
1160 slot, func);
1161
1162 /* Disable legacy interrupts */
1163 pci_set_cfgdata8(pdi, PCIR_INTLINE, 255);
1164 pci_set_cfgdata8(pdi, PCIR_INTPIN, 0);
1165
1166 if (get_config_bool_default("pci.enable_bars", !bootrom_boot()))
1167 pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN);
1168
1169 err = (*pde->pe_init)(pdi, fi->fi_config);
1170 if (err == 0)
1171 fi->fi_devi = pdi;
1172 else
1173 free(pdi);
1174
1175 return (err);
1176 }
1177
1178 void
pci_populate_msicap(struct msicap * msicap,int msgnum,int nextptr)1179 pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr)
1180 {
1181 int mmc;
1182
1183 /* Number of msi messages must be a power of 2 between 1 and 32 */
1184 assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32);
1185 mmc = ffs(msgnum) - 1;
1186
1187 bzero(msicap, sizeof(struct msicap));
1188 msicap->capid = PCIY_MSI;
1189 msicap->nextptr = nextptr;
1190 msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1);
1191 }
1192
1193 int
pci_emul_add_msicap(struct pci_devinst * pi,int msgnum)1194 pci_emul_add_msicap(struct pci_devinst *pi, int msgnum)
1195 {
1196 struct msicap msicap;
1197
1198 pci_populate_msicap(&msicap, msgnum, 0);
1199
1200 return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap)));
1201 }
1202
1203 static void
pci_populate_msixcap(struct msixcap * msixcap,int msgnum,int barnum,uint32_t msix_tab_size)1204 pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum,
1205 uint32_t msix_tab_size)
1206 {
1207
1208 assert(msix_tab_size % 4096 == 0);
1209
1210 bzero(msixcap, sizeof(struct msixcap));
1211 msixcap->capid = PCIY_MSIX;
1212
1213 /*
1214 * Message Control Register, all fields set to
1215 * zero except for the Table Size.
1216 * Note: Table size N is encoded as N-1
1217 */
1218 msixcap->msgctrl = msgnum - 1;
1219
1220 /*
1221 * MSI-X BAR setup:
1222 * - MSI-X table start at offset 0
1223 * - PBA table starts at a 4K aligned offset after the MSI-X table
1224 */
1225 msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK;
1226 msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK);
1227 }
1228
1229 static void
pci_msix_table_init(struct pci_devinst * pi,int table_entries)1230 pci_msix_table_init(struct pci_devinst *pi, int table_entries)
1231 {
1232 int i, table_size;
1233
1234 assert(table_entries > 0);
1235 assert(table_entries <= MAX_MSIX_TABLE_ENTRIES);
1236
1237 table_size = table_entries * MSIX_TABLE_ENTRY_SIZE;
1238 pi->pi_msix.table = calloc(1, table_size);
1239
1240 /* set mask bit of vector control register */
1241 for (i = 0; i < table_entries; i++)
1242 pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK;
1243 }
1244
1245 int
pci_emul_add_msixcap(struct pci_devinst * pi,int msgnum,int barnum)1246 pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum)
1247 {
1248 uint32_t tab_size;
1249 struct msixcap msixcap;
1250
1251 assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES);
1252 assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0);
1253
1254 tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE;
1255
1256 /* Align table size to nearest 4K */
1257 tab_size = roundup2(tab_size, 4096);
1258
1259 pi->pi_msix.table_bar = barnum;
1260 pi->pi_msix.pba_bar = barnum;
1261 pi->pi_msix.table_offset = 0;
1262 pi->pi_msix.table_count = msgnum;
1263 pi->pi_msix.pba_offset = tab_size;
1264 pi->pi_msix.pba_size = PBA_SIZE(msgnum);
1265
1266 pci_msix_table_init(pi, msgnum);
1267
1268 pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size);
1269
1270 /* allocate memory for MSI-X Table and PBA */
1271 pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32,
1272 tab_size + pi->pi_msix.pba_size);
1273
1274 return (pci_emul_add_capability(pi, (u_char *)&msixcap,
1275 sizeof(msixcap)));
1276 }
1277
1278 static void
msixcap_cfgwrite(struct pci_devinst * pi,int capoff,int offset,int bytes,uint32_t val)1279 msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
1280 int bytes, uint32_t val)
1281 {
1282 uint16_t msgctrl, rwmask;
1283 int off;
1284
1285 off = offset - capoff;
1286 /* Message Control Register */
1287 if (off == 2 && bytes == 2) {
1288 rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK;
1289 msgctrl = pci_get_cfgdata16(pi, offset);
1290 msgctrl &= ~rwmask;
1291 msgctrl |= val & rwmask;
1292 val = msgctrl;
1293
1294 pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE;
1295 pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK;
1296 pci_lintr_update(pi);
1297 }
1298
1299 CFGWRITE(pi, offset, val, bytes);
1300 }
1301
1302 static void
msicap_cfgwrite(struct pci_devinst * pi,int capoff,int offset,int bytes,uint32_t val)1303 msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
1304 int bytes, uint32_t val)
1305 {
1306 uint16_t msgctrl, rwmask, msgdata, mme;
1307 uint32_t addrlo;
1308
1309 /*
1310 * If guest is writing to the message control register make sure
1311 * we do not overwrite read-only fields.
1312 */
1313 if ((offset - capoff) == 2 && bytes == 2) {
1314 rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE;
1315 msgctrl = pci_get_cfgdata16(pi, offset);
1316 msgctrl &= ~rwmask;
1317 msgctrl |= val & rwmask;
1318 val = msgctrl;
1319 }
1320 CFGWRITE(pi, offset, val, bytes);
1321
1322 msgctrl = pci_get_cfgdata16(pi, capoff + 2);
1323 addrlo = pci_get_cfgdata32(pi, capoff + 4);
1324 if (msgctrl & PCIM_MSICTRL_64BIT)
1325 msgdata = pci_get_cfgdata16(pi, capoff + 12);
1326 else
1327 msgdata = pci_get_cfgdata16(pi, capoff + 8);
1328
1329 mme = msgctrl & PCIM_MSICTRL_MME_MASK;
1330 pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0;
1331 if (pi->pi_msi.enabled) {
1332 pi->pi_msi.addr = addrlo;
1333 pi->pi_msi.msg_data = msgdata;
1334 pi->pi_msi.maxmsgnum = 1 << (mme >> 4);
1335 } else {
1336 pi->pi_msi.maxmsgnum = 0;
1337 }
1338 pci_lintr_update(pi);
1339 }
1340
1341 static void
pciecap_cfgwrite(struct pci_devinst * pi,int capoff __unused,int offset,int bytes,uint32_t val)1342 pciecap_cfgwrite(struct pci_devinst *pi, int capoff __unused, int offset,
1343 int bytes, uint32_t val)
1344 {
1345
1346 /* XXX don't write to the readonly parts */
1347 CFGWRITE(pi, offset, val, bytes);
1348 }
1349
1350 #define PCIECAP_VERSION 0x2
1351 int
pci_emul_add_pciecap(struct pci_devinst * pi,int type)1352 pci_emul_add_pciecap(struct pci_devinst *pi, int type)
1353 {
1354 int err;
1355 struct pciecap pciecap;
1356
1357 bzero(&pciecap, sizeof(pciecap));
1358
1359 /*
1360 * Use the integrated endpoint type for endpoints on a root complex bus.
1361 *
1362 * NB: bhyve currently only supports a single PCI bus that is the root
1363 * complex bus, so all endpoints are integrated.
1364 */
1365 if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0))
1366 type = PCIEM_TYPE_ROOT_INT_EP;
1367
1368 pciecap.capid = PCIY_EXPRESS;
1369 pciecap.pcie_capabilities = PCIECAP_VERSION | type;
1370 if (type != PCIEM_TYPE_ROOT_INT_EP) {
1371 pciecap.link_capabilities = 0x411; /* gen1, x1 */
1372 pciecap.link_status = 0x11; /* gen1, x1 */
1373 }
1374
1375 err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap));
1376 return (err);
1377 }
1378
1379 /*
1380 * This function assumes that 'coff' is in the capabilities region of the
1381 * config space. A capoff parameter of zero will force a search for the
1382 * offset and type.
1383 */
1384 void
pci_emul_capwrite(struct pci_devinst * pi,int offset,int bytes,uint32_t val,uint8_t capoff,int capid)1385 pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val,
1386 uint8_t capoff, int capid)
1387 {
1388 uint8_t nextoff;
1389
1390 /* Do not allow un-aligned writes */
1391 if ((offset & (bytes - 1)) != 0)
1392 return;
1393
1394 if (capoff == 0) {
1395 /* Find the capability that we want to update */
1396 capoff = CAP_START_OFFSET;
1397 while (1) {
1398 nextoff = pci_get_cfgdata8(pi, capoff + 1);
1399 if (nextoff == 0)
1400 break;
1401 if (offset >= capoff && offset < nextoff)
1402 break;
1403
1404 capoff = nextoff;
1405 }
1406 assert(offset >= capoff);
1407 capid = pci_get_cfgdata8(pi, capoff);
1408 }
1409
1410 /*
1411 * Capability ID and Next Capability Pointer are readonly.
1412 * However, some o/s's do 4-byte writes that include these.
1413 * For this case, trim the write back to 2 bytes and adjust
1414 * the data.
1415 */
1416 if (offset == capoff || offset == capoff + 1) {
1417 if (offset == capoff && bytes == 4) {
1418 bytes = 2;
1419 offset += 2;
1420 val >>= 16;
1421 } else
1422 return;
1423 }
1424
1425 switch (capid) {
1426 case PCIY_MSI:
1427 msicap_cfgwrite(pi, capoff, offset, bytes, val);
1428 break;
1429 case PCIY_MSIX:
1430 msixcap_cfgwrite(pi, capoff, offset, bytes, val);
1431 break;
1432 case PCIY_EXPRESS:
1433 pciecap_cfgwrite(pi, capoff, offset, bytes, val);
1434 break;
1435 default:
1436 break;
1437 }
1438 }
1439
1440 static int
pci_emul_iscap(struct pci_devinst * pi,int offset)1441 pci_emul_iscap(struct pci_devinst *pi, int offset)
1442 {
1443 uint16_t sts;
1444
1445 sts = pci_get_cfgdata16(pi, PCIR_STATUS);
1446 if ((sts & PCIM_STATUS_CAPPRESENT) != 0) {
1447 if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend)
1448 return (1);
1449 }
1450 return (0);
1451 }
1452
1453 static int
pci_emul_fallback_handler(struct vcpu * vcpu __unused,int dir,uint64_t addr __unused,int size __unused,uint64_t * val,void * arg1 __unused,long arg2 __unused)1454 pci_emul_fallback_handler(struct vcpu *vcpu __unused, int dir,
1455 uint64_t addr __unused, int size __unused, uint64_t *val,
1456 void *arg1 __unused, long arg2 __unused)
1457 {
1458 /*
1459 * Ignore writes; return 0xff's for reads. The mem read code
1460 * will take care of truncating to the correct size.
1461 */
1462 if (dir == MEM_F_READ) {
1463 *val = 0xffffffffffffffff;
1464 }
1465
1466 return (0);
1467 }
1468
1469 static int
pci_emul_ecfg_handler(struct vcpu * vcpu __unused,int dir,uint64_t addr,int bytes,uint64_t * val,void * arg1 __unused,long arg2 __unused)1470 pci_emul_ecfg_handler(struct vcpu *vcpu __unused, int dir, uint64_t addr,
1471 int bytes, uint64_t *val, void *arg1 __unused, long arg2 __unused)
1472 {
1473 int bus, slot, func, coff, in;
1474
1475 coff = addr & 0xfff;
1476 func = (addr >> 12) & 0x7;
1477 slot = (addr >> 15) & 0x1f;
1478 bus = (addr >> 20) & 0xff;
1479 in = (dir == MEM_F_READ);
1480 if (in)
1481 *val = ~0UL;
1482 pci_cfgrw(in, bus, slot, func, coff, bytes, (uint32_t *)val);
1483 return (0);
1484 }
1485
1486 uint64_t
pci_ecfg_base(void)1487 pci_ecfg_base(void)
1488 {
1489
1490 return (PCI_EMUL_ECFG_BASE);
1491 }
1492
1493 static int
init_bootorder(void)1494 init_bootorder(void)
1495 {
1496 struct boot_device *device;
1497 FILE *fp;
1498 char *bootorder;
1499 size_t bootorder_len;
1500
1501 if (TAILQ_EMPTY(&boot_devices))
1502 return (0);
1503
1504 fp = open_memstream(&bootorder, &bootorder_len);
1505 TAILQ_FOREACH(device, &boot_devices, boot_device_chain) {
1506 fprintf(fp, "/pci@i0cf8/pci@%d,%d\n",
1507 device->pdi->pi_slot, device->pdi->pi_func);
1508 }
1509 fclose(fp);
1510
1511 return (qemu_fwcfg_add_file("bootorder", bootorder_len, bootorder));
1512 }
1513
1514 #define BUSIO_ROUNDUP 32
1515 #define BUSMEM32_ROUNDUP (1024 * 1024)
1516 #define BUSMEM64_ROUNDUP (512 * 1024 * 1024)
1517
1518 int
init_pci(struct vmctx * ctx)1519 init_pci(struct vmctx *ctx)
1520 {
1521 char node_name[sizeof("pci.XXX.XX.X")];
1522 struct mem_range mr;
1523 struct pci_devemu *pde;
1524 struct businfo *bi;
1525 struct slotinfo *si;
1526 struct funcinfo *fi;
1527 nvlist_t *nvl;
1528 const char *emul;
1529 size_t lowmem;
1530 int bus, slot, func;
1531 int error;
1532
1533 if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32)
1534 errx(EX_OSERR, "Invalid lowmem limit");
1535
1536 pci_emul_iobase = PCI_EMUL_IOBASE;
1537 pci_emul_membase32 = PCI_EMUL_MEMBASE32;
1538
1539 pci_emul_membase64 = vm_get_highmem_base(ctx) +
1540 vm_get_highmem_size(ctx);
1541 pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64);
1542 pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64;
1543
1544 TAILQ_INIT(&boot_devices);
1545
1546 for (bus = 0; bus < MAXBUSES; bus++) {
1547 snprintf(node_name, sizeof(node_name), "pci.%d", bus);
1548 nvl = find_config_node(node_name);
1549 if (nvl == NULL)
1550 continue;
1551 pci_businfo[bus] = calloc(1, sizeof(struct businfo));
1552 bi = pci_businfo[bus];
1553
1554 /*
1555 * Keep track of the i/o and memory resources allocated to
1556 * this bus.
1557 */
1558 bi->iobase = pci_emul_iobase;
1559 bi->membase32 = pci_emul_membase32;
1560 bi->membase64 = pci_emul_membase64;
1561
1562 /* first run: init devices */
1563 for (slot = 0; slot < MAXSLOTS; slot++) {
1564 si = &bi->slotinfo[slot];
1565 for (func = 0; func < MAXFUNCS; func++) {
1566 fi = &si->si_funcs[func];
1567 snprintf(node_name, sizeof(node_name),
1568 "pci.%d.%d.%d", bus, slot, func);
1569 nvl = find_config_node(node_name);
1570 if (nvl == NULL)
1571 continue;
1572
1573 fi->fi_config = nvl;
1574 emul = get_config_value_node(nvl, "device");
1575 if (emul == NULL) {
1576 EPRINTLN("pci slot %d:%d:%d: missing "
1577 "\"device\" value", bus, slot, func);
1578 return (EINVAL);
1579 }
1580 pde = pci_emul_finddev(emul);
1581 if (pde == NULL) {
1582 EPRINTLN("pci slot %d:%d:%d: unknown "
1583 "device \"%s\"", bus, slot, func,
1584 emul);
1585 return (EINVAL);
1586 }
1587 if (pde->pe_alias != NULL) {
1588 EPRINTLN("pci slot %d:%d:%d: legacy "
1589 "device \"%s\", use \"%s\" instead",
1590 bus, slot, func, emul,
1591 pde->pe_alias);
1592 return (EINVAL);
1593 }
1594 fi->fi_pde = pde;
1595 error = pci_emul_init(ctx, pde, bus, slot,
1596 func, fi);
1597 if (error)
1598 return (error);
1599 }
1600 }
1601
1602 /* second run: assign BARs and free list */
1603 struct pci_bar_allocation *bar;
1604 struct pci_bar_allocation *bar_tmp;
1605 TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) {
1606 pci_emul_assign_bar(bar->pdi, bar->idx, bar->type,
1607 bar->size);
1608 free(bar);
1609 }
1610 TAILQ_INIT(&pci_bars);
1611
1612 /*
1613 * Add some slop to the I/O and memory resources decoded by
1614 * this bus to give a guest some flexibility if it wants to
1615 * reprogram the BARs.
1616 */
1617 pci_emul_iobase += BUSIO_ROUNDUP;
1618 pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP);
1619 bi->iolimit = pci_emul_iobase;
1620
1621 pci_emul_membase32 += BUSMEM32_ROUNDUP;
1622 pci_emul_membase32 = roundup2(pci_emul_membase32,
1623 BUSMEM32_ROUNDUP);
1624 bi->memlimit32 = pci_emul_membase32;
1625
1626 pci_emul_membase64 += BUSMEM64_ROUNDUP;
1627 pci_emul_membase64 = roundup2(pci_emul_membase64,
1628 BUSMEM64_ROUNDUP);
1629 bi->memlimit64 = pci_emul_membase64;
1630 }
1631
1632 /*
1633 * PCI backends are initialized before routing INTx interrupts
1634 * so that LPC devices are able to reserve ISA IRQs before
1635 * routing PIRQ pins.
1636 */
1637 for (bus = 0; bus < MAXBUSES; bus++) {
1638 if ((bi = pci_businfo[bus]) == NULL)
1639 continue;
1640
1641 for (slot = 0; slot < MAXSLOTS; slot++) {
1642 si = &bi->slotinfo[slot];
1643 for (func = 0; func < MAXFUNCS; func++) {
1644 fi = &si->si_funcs[func];
1645 if (fi->fi_devi == NULL)
1646 continue;
1647 pci_lintr_route(fi->fi_devi);
1648 }
1649 }
1650 }
1651 #ifdef __amd64__
1652 lpc_pirq_routed();
1653 #endif
1654
1655 if ((error = init_bootorder()) != 0) {
1656 warnx("%s: Unable to init bootorder", __func__);
1657 return (error);
1658 }
1659
1660 /*
1661 * The guest physical memory map looks like the following on amd64:
1662 * [0, lowmem) guest system memory
1663 * [lowmem, 0xC0000000) memory hole (may be absent)
1664 * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation)
1665 * [0xE0000000, 0xF0000000) PCI extended config window
1666 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware
1667 * [4GB, 4GB + highmem) guest system memory
1668 * [roundup(4GB + highmem, 32GB), ...) PCI 64-bit BAR allocation
1669 *
1670 * On arm64 the guest physical memory map looks like this:
1671 * [0x0DF00000, 0x10000000) PCI I/O memory
1672 * [0xA0000000, 0xE0000000) PCI 32-bit BAR allocation
1673 * [0xE0000000, 0xF0000000) PCI extended config window
1674 * [4GB, 4GB + highmem) guest system memory
1675 * [roundup(4GB + highmem, 32GB), ...) PCI 64-bit BAR allocation
1676 *
1677 * "lowmem" is guest memory below 0xC0000000. amd64 guests provisioned
1678 * with less than 3GB of RAM will have no memory above the 4GB boundary.
1679 * System memory for arm64 guests is all above the 4GB boundary.
1680 */
1681
1682 /*
1683 * Accesses to memory addresses that are not allocated to system
1684 * memory or PCI devices return 0xff's.
1685 */
1686 lowmem = vm_get_lowmem_size(ctx);
1687 bzero(&mr, sizeof(struct mem_range));
1688 mr.name = "PCI hole";
1689 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE;
1690 mr.base = lowmem;
1691 mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem;
1692 mr.handler = pci_emul_fallback_handler;
1693 error = register_mem_fallback(&mr);
1694 assert(error == 0);
1695
1696 /* PCI extended config space */
1697 bzero(&mr, sizeof(struct mem_range));
1698 mr.name = "PCI ECFG";
1699 mr.flags = MEM_F_RW | MEM_F_IMMUTABLE;
1700 mr.base = PCI_EMUL_ECFG_BASE;
1701 mr.size = PCI_EMUL_ECFG_SIZE;
1702 mr.handler = pci_emul_ecfg_handler;
1703 error = register_mem(&mr);
1704 assert(error == 0);
1705
1706 return (0);
1707 }
1708
1709 #ifdef __amd64__
1710 static void
pci_apic_prt_entry(int bus __unused,int slot,int pin,struct pci_irq * irq,void * arg __unused)1711 pci_apic_prt_entry(int bus __unused, int slot, int pin, struct pci_irq *irq,
1712 void *arg __unused)
1713 {
1714
1715 dsdt_line(" Package ()");
1716 dsdt_line(" {");
1717 dsdt_line(" 0x%X,", slot << 16 | 0xffff);
1718 dsdt_line(" 0x%02X,", pin - 1);
1719 dsdt_line(" Zero,");
1720 dsdt_line(" 0x%X", irq->ioapic_irq);
1721 dsdt_line(" },");
1722 }
1723
1724 static void
pci_pirq_prt_entry(int bus __unused,int slot,int pin,struct pci_irq * irq,void * arg __unused)1725 pci_pirq_prt_entry(int bus __unused, int slot, int pin, struct pci_irq *irq,
1726 void *arg __unused)
1727 {
1728 char *name;
1729
1730 name = lpc_pirq_name(irq->pirq_pin);
1731 if (name == NULL)
1732 return;
1733 dsdt_line(" Package ()");
1734 dsdt_line(" {");
1735 dsdt_line(" 0x%X,", slot << 16 | 0xffff);
1736 dsdt_line(" 0x%02X,", pin - 1);
1737 dsdt_line(" %s,", name);
1738 dsdt_line(" 0x00");
1739 dsdt_line(" },");
1740 free(name);
1741 }
1742 #endif
1743
1744 /*
1745 * A bhyve virtual machine has a flat PCI hierarchy with a root port
1746 * corresponding to each PCI bus.
1747 */
1748 static void
pci_bus_write_dsdt(int bus)1749 pci_bus_write_dsdt(int bus)
1750 {
1751 struct businfo *bi;
1752 struct slotinfo *si;
1753 struct pci_devinst *pi;
1754 int func, slot;
1755
1756 /*
1757 * If there are no devices on this 'bus' then just return.
1758 */
1759 if ((bi = pci_businfo[bus]) == NULL) {
1760 /*
1761 * Bus 0 is special because it decodes the I/O ports used
1762 * for PCI config space access even if there are no devices
1763 * on it.
1764 */
1765 if (bus != 0)
1766 return;
1767 }
1768
1769 dsdt_line(" Device (PC%02X)", bus);
1770 dsdt_line(" {");
1771 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))");
1772
1773 dsdt_line(" Method (_BBN, 0, NotSerialized)");
1774 dsdt_line(" {");
1775 dsdt_line(" Return (0x%08X)", bus);
1776 dsdt_line(" }");
1777 dsdt_line(" Name (_CRS, ResourceTemplate ()");
1778 dsdt_line(" {");
1779 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, "
1780 "MaxFixed, PosDecode,");
1781 dsdt_line(" 0x0000, // Granularity");
1782 dsdt_line(" 0x%04X, // Range Minimum", bus);
1783 dsdt_line(" 0x%04X, // Range Maximum", bus);
1784 dsdt_line(" 0x0000, // Translation Offset");
1785 dsdt_line(" 0x0001, // Length");
1786 dsdt_line(" ,, )");
1787
1788 #ifdef __amd64__
1789 if (bus == 0) {
1790 dsdt_indent(3);
1791 dsdt_fixed_ioport(0xCF8, 8);
1792 dsdt_unindent(3);
1793
1794 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, "
1795 "PosDecode, EntireRange,");
1796 dsdt_line(" 0x0000, // Granularity");
1797 dsdt_line(" 0x0000, // Range Minimum");
1798 dsdt_line(" 0x0CF7, // Range Maximum");
1799 dsdt_line(" 0x0000, // Translation Offset");
1800 dsdt_line(" 0x0CF8, // Length");
1801 dsdt_line(" ,, , TypeStatic)");
1802
1803 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, "
1804 "PosDecode, EntireRange,");
1805 dsdt_line(" 0x0000, // Granularity");
1806 dsdt_line(" 0x0D00, // Range Minimum");
1807 dsdt_line(" 0x%04X, // Range Maximum",
1808 PCI_EMUL_IOBASE - 1);
1809 dsdt_line(" 0x0000, // Translation Offset");
1810 dsdt_line(" 0x%04X, // Length",
1811 PCI_EMUL_IOBASE - 0x0D00);
1812 dsdt_line(" ,, , TypeStatic)");
1813
1814 if (bi == NULL) {
1815 dsdt_line(" })");
1816 goto done;
1817 }
1818 }
1819 #endif
1820 assert(bi != NULL);
1821
1822 /* i/o window */
1823 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, "
1824 "PosDecode, EntireRange,");
1825 dsdt_line(" 0x0000, // Granularity");
1826 dsdt_line(" 0x%04X, // Range Minimum", bi->iobase);
1827 dsdt_line(" 0x%04X, // Range Maximum",
1828 bi->iolimit - 1);
1829 dsdt_line(" 0x0000, // Translation Offset");
1830 dsdt_line(" 0x%04X, // Length",
1831 bi->iolimit - bi->iobase);
1832 dsdt_line(" ,, , TypeStatic)");
1833
1834 /* mmio window (32-bit) */
1835 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, "
1836 "MinFixed, MaxFixed, NonCacheable, ReadWrite,");
1837 dsdt_line(" 0x00000000, // Granularity");
1838 dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32);
1839 dsdt_line(" 0x%08X, // Range Maximum\n",
1840 bi->memlimit32 - 1);
1841 dsdt_line(" 0x00000000, // Translation Offset");
1842 dsdt_line(" 0x%08X, // Length\n",
1843 bi->memlimit32 - bi->membase32);
1844 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)");
1845
1846 /* mmio window (64-bit) */
1847 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, "
1848 "MinFixed, MaxFixed, NonCacheable, ReadWrite,");
1849 dsdt_line(" 0x0000000000000000, // Granularity");
1850 dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64);
1851 dsdt_line(" 0x%016lX, // Range Maximum\n",
1852 bi->memlimit64 - 1);
1853 dsdt_line(" 0x0000000000000000, // Translation Offset");
1854 dsdt_line(" 0x%016lX, // Length\n",
1855 bi->memlimit64 - bi->membase64);
1856 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)");
1857 dsdt_line(" })");
1858
1859 #ifdef __amd64__
1860 if (pci_count_lintr(bus) != 0) {
1861 dsdt_indent(2);
1862 dsdt_line("Name (PPRT, Package ()");
1863 dsdt_line("{");
1864 pci_walk_lintr(bus, pci_pirq_prt_entry, NULL);
1865 dsdt_line("})");
1866 dsdt_line("Name (APRT, Package ()");
1867 dsdt_line("{");
1868 pci_walk_lintr(bus, pci_apic_prt_entry, NULL);
1869 dsdt_line("})");
1870 dsdt_line("Method (_PRT, 0, NotSerialized)");
1871 dsdt_line("{");
1872 dsdt_line(" If (PICM)");
1873 dsdt_line(" {");
1874 dsdt_line(" Return (APRT)");
1875 dsdt_line(" }");
1876 dsdt_line(" Else");
1877 dsdt_line(" {");
1878 dsdt_line(" Return (PPRT)");
1879 dsdt_line(" }");
1880 dsdt_line("}");
1881 dsdt_unindent(2);
1882 }
1883 #endif
1884
1885 dsdt_indent(2);
1886 for (slot = 0; slot < MAXSLOTS; slot++) {
1887 si = &bi->slotinfo[slot];
1888 for (func = 0; func < MAXFUNCS; func++) {
1889 pi = si->si_funcs[func].fi_devi;
1890 if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL)
1891 pi->pi_d->pe_write_dsdt(pi);
1892 }
1893 }
1894 dsdt_unindent(2);
1895 #ifdef __amd64__
1896 done:
1897 #endif
1898 dsdt_line(" }");
1899 }
1900
1901 void
pci_write_dsdt(void)1902 pci_write_dsdt(void)
1903 {
1904 int bus;
1905
1906 dsdt_indent(1);
1907 dsdt_line("Name (PICM, 0x00)");
1908 dsdt_line("Method (_PIC, 1, NotSerialized)");
1909 dsdt_line("{");
1910 dsdt_line(" Store (Arg0, PICM)");
1911 dsdt_line("}");
1912 dsdt_line("");
1913 dsdt_line("Scope (_SB)");
1914 dsdt_line("{");
1915 for (bus = 0; bus < MAXBUSES; bus++)
1916 pci_bus_write_dsdt(bus);
1917 dsdt_line("}");
1918 dsdt_unindent(1);
1919 }
1920
1921 int
pci_bus_configured(int bus)1922 pci_bus_configured(int bus)
1923 {
1924 assert(bus >= 0 && bus < MAXBUSES);
1925 return (pci_businfo[bus] != NULL);
1926 }
1927
1928 int
pci_msi_enabled(struct pci_devinst * pi)1929 pci_msi_enabled(struct pci_devinst *pi)
1930 {
1931 return (pi->pi_msi.enabled);
1932 }
1933
1934 int
pci_msi_maxmsgnum(struct pci_devinst * pi)1935 pci_msi_maxmsgnum(struct pci_devinst *pi)
1936 {
1937 if (pi->pi_msi.enabled)
1938 return (pi->pi_msi.maxmsgnum);
1939 else
1940 return (0);
1941 }
1942
1943 int
pci_msix_enabled(struct pci_devinst * pi)1944 pci_msix_enabled(struct pci_devinst *pi)
1945 {
1946
1947 return (pi->pi_msix.enabled && !pi->pi_msi.enabled);
1948 }
1949
1950 void
pci_generate_msix(struct pci_devinst * pi,int index)1951 pci_generate_msix(struct pci_devinst *pi, int index)
1952 {
1953 struct msix_table_entry *mte;
1954
1955 if (!pci_msix_enabled(pi))
1956 return;
1957
1958 if (pi->pi_msix.function_mask)
1959 return;
1960
1961 if (index >= pi->pi_msix.table_count)
1962 return;
1963
1964 mte = &pi->pi_msix.table[index];
1965 if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
1966 /* XXX Set PBA bit if interrupt is disabled */
1967 vm_raise_msi(pi->pi_vmctx, mte->addr, mte->msg_data,
1968 pi->pi_bus, pi->pi_slot, pi->pi_func);
1969 }
1970 }
1971
1972 void
pci_generate_msi(struct pci_devinst * pi,int index)1973 pci_generate_msi(struct pci_devinst *pi, int index)
1974 {
1975
1976 if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) {
1977 vm_raise_msi(pi->pi_vmctx, pi->pi_msi.addr,
1978 pi->pi_msi.msg_data + index,
1979 pi->pi_bus, pi->pi_slot, pi->pi_func);
1980 }
1981 }
1982
1983 static bool
pci_lintr_permitted(struct pci_devinst * pi)1984 pci_lintr_permitted(struct pci_devinst *pi)
1985 {
1986 uint16_t cmd;
1987
1988 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
1989 return (!(pi->pi_msi.enabled || pi->pi_msix.enabled ||
1990 (cmd & PCIM_CMD_INTxDIS)));
1991 }
1992
1993 void
pci_lintr_request(struct pci_devinst * pi)1994 pci_lintr_request(struct pci_devinst *pi)
1995 {
1996 struct businfo *bi;
1997 struct slotinfo *si;
1998 int bestpin, bestcount, pin;
1999
2000 bi = pci_businfo[pi->pi_bus];
2001 assert(bi != NULL);
2002
2003 /*
2004 * Just allocate a pin from our slot. The pin will be
2005 * assigned IRQs later when interrupts are routed.
2006 */
2007 si = &bi->slotinfo[pi->pi_slot];
2008 bestpin = 0;
2009 bestcount = si->si_intpins[0].ii_count;
2010 for (pin = 1; pin < 4; pin++) {
2011 if (si->si_intpins[pin].ii_count < bestcount) {
2012 bestpin = pin;
2013 bestcount = si->si_intpins[pin].ii_count;
2014 }
2015 }
2016
2017 si->si_intpins[bestpin].ii_count++;
2018 pi->pi_lintr.pin = bestpin + 1;
2019 pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1);
2020 }
2021
2022 static void
pci_lintr_route(struct pci_devinst * pi)2023 pci_lintr_route(struct pci_devinst *pi)
2024 {
2025 struct businfo *bi;
2026 struct intxinfo *ii;
2027 struct pci_irq *irq;
2028
2029 if (pi->pi_lintr.pin == 0)
2030 return;
2031
2032 bi = pci_businfo[pi->pi_bus];
2033 assert(bi != NULL);
2034 ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1];
2035 irq = &ii->ii_irq;
2036 pci_irq_route(pi, irq);
2037 pi->pi_lintr.irq = *irq;
2038 pci_set_cfgdata8(pi, PCIR_INTLINE, pci_irq_intline(irq));
2039 }
2040
2041 void
pci_lintr_assert(struct pci_devinst * pi)2042 pci_lintr_assert(struct pci_devinst *pi)
2043 {
2044
2045 assert(pi->pi_lintr.pin > 0);
2046
2047 pthread_mutex_lock(&pi->pi_lintr.lock);
2048 if (pi->pi_lintr.state == IDLE) {
2049 if (pci_lintr_permitted(pi)) {
2050 pi->pi_lintr.state = ASSERTED;
2051 pci_irq_assert(pi);
2052 } else
2053 pi->pi_lintr.state = PENDING;
2054 }
2055 pthread_mutex_unlock(&pi->pi_lintr.lock);
2056 }
2057
2058 void
pci_lintr_deassert(struct pci_devinst * pi)2059 pci_lintr_deassert(struct pci_devinst *pi)
2060 {
2061
2062 assert(pi->pi_lintr.pin > 0);
2063
2064 pthread_mutex_lock(&pi->pi_lintr.lock);
2065 if (pi->pi_lintr.state == ASSERTED) {
2066 pi->pi_lintr.state = IDLE;
2067 pci_irq_deassert(pi);
2068 } else if (pi->pi_lintr.state == PENDING)
2069 pi->pi_lintr.state = IDLE;
2070 pthread_mutex_unlock(&pi->pi_lintr.lock);
2071 }
2072
2073 static void
pci_lintr_update(struct pci_devinst * pi)2074 pci_lintr_update(struct pci_devinst *pi)
2075 {
2076
2077 pthread_mutex_lock(&pi->pi_lintr.lock);
2078 if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) {
2079 pci_irq_deassert(pi);
2080 pi->pi_lintr.state = PENDING;
2081 } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) {
2082 pi->pi_lintr.state = ASSERTED;
2083 pci_irq_assert(pi);
2084 }
2085 pthread_mutex_unlock(&pi->pi_lintr.lock);
2086 }
2087
2088 int
pci_count_lintr(int bus)2089 pci_count_lintr(int bus)
2090 {
2091 int count, slot, pin;
2092 struct slotinfo *slotinfo;
2093
2094 count = 0;
2095 if (pci_businfo[bus] != NULL) {
2096 for (slot = 0; slot < MAXSLOTS; slot++) {
2097 slotinfo = &pci_businfo[bus]->slotinfo[slot];
2098 for (pin = 0; pin < 4; pin++) {
2099 if (slotinfo->si_intpins[pin].ii_count != 0)
2100 count++;
2101 }
2102 }
2103 }
2104 return (count);
2105 }
2106
2107 void
pci_walk_lintr(int bus,pci_lintr_cb cb,void * arg)2108 pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg)
2109 {
2110 struct businfo *bi;
2111 struct slotinfo *si;
2112 struct intxinfo *ii;
2113 int slot, pin;
2114
2115 if ((bi = pci_businfo[bus]) == NULL)
2116 return;
2117
2118 for (slot = 0; slot < MAXSLOTS; slot++) {
2119 si = &bi->slotinfo[slot];
2120 for (pin = 0; pin < 4; pin++) {
2121 ii = &si->si_intpins[pin];
2122 if (ii->ii_count != 0)
2123 cb(bus, slot, pin + 1, &ii->ii_irq, arg);
2124 }
2125 }
2126 }
2127
2128 /*
2129 * Return 1 if the emulated device in 'slot' is a multi-function device.
2130 * Return 0 otherwise.
2131 */
2132 static int
pci_emul_is_mfdev(int bus,int slot)2133 pci_emul_is_mfdev(int bus, int slot)
2134 {
2135 struct businfo *bi;
2136 struct slotinfo *si;
2137 int f, numfuncs;
2138
2139 numfuncs = 0;
2140 if ((bi = pci_businfo[bus]) != NULL) {
2141 si = &bi->slotinfo[slot];
2142 for (f = 0; f < MAXFUNCS; f++) {
2143 if (si->si_funcs[f].fi_devi != NULL) {
2144 numfuncs++;
2145 }
2146 }
2147 }
2148 return (numfuncs > 1);
2149 }
2150
2151 /*
2152 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on
2153 * whether or not is a multi-function being emulated in the pci 'slot'.
2154 */
2155 static void
pci_emul_hdrtype_fixup(int bus,int slot,int off,int bytes,uint32_t * rv)2156 pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv)
2157 {
2158 int mfdev;
2159
2160 if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) {
2161 mfdev = pci_emul_is_mfdev(bus, slot);
2162 switch (bytes) {
2163 case 1:
2164 case 2:
2165 *rv &= ~PCIM_MFDEV;
2166 if (mfdev) {
2167 *rv |= PCIM_MFDEV;
2168 }
2169 break;
2170 case 4:
2171 *rv &= ~(PCIM_MFDEV << 16);
2172 if (mfdev) {
2173 *rv |= (PCIM_MFDEV << 16);
2174 }
2175 break;
2176 }
2177 }
2178 }
2179
2180 /*
2181 * Update device state in response to changes to the PCI command
2182 * register.
2183 */
2184 void
pci_emul_cmd_changed(struct pci_devinst * pi,uint16_t old)2185 pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old)
2186 {
2187 int i;
2188 uint16_t changed, new;
2189
2190 new = pci_get_cfgdata16(pi, PCIR_COMMAND);
2191 changed = old ^ new;
2192
2193 /*
2194 * If the MMIO or I/O address space decoding has changed then
2195 * register/unregister all BARs that decode that address space.
2196 */
2197 for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) {
2198 switch (pi->pi_bar[i].type) {
2199 case PCIBAR_NONE:
2200 case PCIBAR_MEMHI64:
2201 break;
2202 case PCIBAR_IO:
2203 /* I/O address space decoding changed? */
2204 if (changed & PCIM_CMD_PORTEN) {
2205 if (new & PCIM_CMD_PORTEN)
2206 register_bar(pi, i);
2207 else
2208 unregister_bar(pi, i);
2209 }
2210 break;
2211 case PCIBAR_ROM:
2212 /* skip (un-)register of ROM if it disabled */
2213 if (!romen(pi))
2214 break;
2215 /* fallthrough */
2216 case PCIBAR_MEM32:
2217 case PCIBAR_MEM64:
2218 /* MMIO address space decoding changed? */
2219 if (changed & PCIM_CMD_MEMEN) {
2220 if (new & PCIM_CMD_MEMEN)
2221 register_bar(pi, i);
2222 else
2223 unregister_bar(pi, i);
2224 }
2225 break;
2226 default:
2227 assert(0);
2228 }
2229 }
2230
2231 /*
2232 * If INTx has been unmasked and is pending, assert the
2233 * interrupt.
2234 */
2235 pci_lintr_update(pi);
2236 }
2237
2238 static void
pci_emul_cmdsts_write(struct pci_devinst * pi,int coff,uint32_t new,int bytes)2239 pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes)
2240 {
2241 int rshift;
2242 uint32_t cmd, old, readonly;
2243
2244 cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */
2245
2246 /*
2247 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3.
2248 *
2249 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are
2250 * 'write 1 to clear'. However these bits are not set to '1' by
2251 * any device emulation so it is simpler to treat them as readonly.
2252 */
2253 rshift = (coff & 0x3) * 8;
2254 readonly = 0xFFFFF880 >> rshift;
2255
2256 old = CFGREAD(pi, coff, bytes);
2257 new &= ~readonly;
2258 new |= (old & readonly);
2259 CFGWRITE(pi, coff, new, bytes); /* update config */
2260
2261 pci_emul_cmd_changed(pi, cmd);
2262 }
2263
2264 static void
pci_cfgrw(int in,int bus,int slot,int func,int coff,int bytes,uint32_t * valp)2265 pci_cfgrw(int in, int bus, int slot, int func, int coff, int bytes,
2266 uint32_t *valp)
2267 {
2268 struct businfo *bi;
2269 struct slotinfo *si;
2270 struct pci_devinst *pi;
2271 struct pci_devemu *pe;
2272 int idx, needcfg;
2273 uint64_t addr, bar, mask;
2274
2275 if ((bi = pci_businfo[bus]) != NULL) {
2276 si = &bi->slotinfo[slot];
2277 pi = si->si_funcs[func].fi_devi;
2278 } else
2279 pi = NULL;
2280
2281 /*
2282 * Just return if there is no device at this slot:func or if the
2283 * guest is doing an un-aligned access.
2284 */
2285 if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) ||
2286 (coff & (bytes - 1)) != 0) {
2287 if (in)
2288 *valp = 0xffffffff;
2289 return;
2290 }
2291
2292 /*
2293 * Ignore all writes beyond the standard config space and return all
2294 * ones on reads.
2295 */
2296 if (coff >= PCI_REGMAX + 1) {
2297 if (in) {
2298 *valp = 0xffffffff;
2299 /*
2300 * Extended capabilities begin at offset 256 in config
2301 * space. Absence of extended capabilities is signaled
2302 * with all 0s in the extended capability header at
2303 * offset 256.
2304 */
2305 if (coff <= PCI_REGMAX + 4)
2306 *valp = 0x00000000;
2307 }
2308 return;
2309 }
2310
2311 pe = pi->pi_d;
2312
2313 /*
2314 * Config read
2315 */
2316 if (in) {
2317 /* Let the device emulation override the default handler */
2318 if (pe->pe_cfgread != NULL) {
2319 needcfg = pe->pe_cfgread(pi, coff, bytes, valp);
2320 } else {
2321 needcfg = 1;
2322 }
2323
2324 if (needcfg)
2325 *valp = CFGREAD(pi, coff, bytes);
2326
2327 pci_emul_hdrtype_fixup(bus, slot, coff, bytes, valp);
2328 } else {
2329 /* Let the device emulation override the default handler */
2330 if (pe->pe_cfgwrite != NULL &&
2331 (*pe->pe_cfgwrite)(pi, coff, bytes, *valp) == 0)
2332 return;
2333
2334 /*
2335 * Special handling for write to BAR and ROM registers
2336 */
2337 if (is_pcir_bar(coff) || is_pcir_bios(coff)) {
2338 /*
2339 * Ignore writes to BAR registers that are not
2340 * 4-byte aligned.
2341 */
2342 if (bytes != 4 || (coff & 0x3) != 0)
2343 return;
2344
2345 if (is_pcir_bar(coff)) {
2346 idx = (coff - PCIR_BAR(0)) / 4;
2347 } else if (is_pcir_bios(coff)) {
2348 idx = PCI_ROM_IDX;
2349 } else {
2350 errx(4, "%s: invalid BAR offset %d", __func__,
2351 coff);
2352 }
2353
2354 mask = ~(pi->pi_bar[idx].size - 1);
2355 switch (pi->pi_bar[idx].type) {
2356 case PCIBAR_NONE:
2357 pi->pi_bar[idx].addr = bar = 0;
2358 break;
2359 case PCIBAR_IO:
2360 addr = *valp & mask;
2361 #if defined(PCI_EMUL_IOMASK)
2362 addr &= PCI_EMUL_IOMASK;
2363 #endif
2364 bar = addr | pi->pi_bar[idx].lobits;
2365 /*
2366 * Register the new BAR value for interception
2367 */
2368 if (addr != pi->pi_bar[idx].addr) {
2369 update_bar_address(pi, addr, idx,
2370 PCIBAR_IO);
2371 }
2372 break;
2373 case PCIBAR_MEM32:
2374 addr = bar = *valp & mask;
2375 bar |= pi->pi_bar[idx].lobits;
2376 if (addr != pi->pi_bar[idx].addr) {
2377 update_bar_address(pi, addr, idx,
2378 PCIBAR_MEM32);
2379 }
2380 break;
2381 case PCIBAR_MEM64:
2382 addr = bar = *valp & mask;
2383 bar |= pi->pi_bar[idx].lobits;
2384 if (addr != (uint32_t)pi->pi_bar[idx].addr) {
2385 update_bar_address(pi, addr, idx,
2386 PCIBAR_MEM64);
2387 }
2388 break;
2389 case PCIBAR_MEMHI64:
2390 mask = ~(pi->pi_bar[idx - 1].size - 1);
2391 addr = ((uint64_t)*valp << 32) & mask;
2392 bar = addr >> 32;
2393 if (bar != pi->pi_bar[idx - 1].addr >> 32) {
2394 update_bar_address(pi, addr, idx - 1,
2395 PCIBAR_MEMHI64);
2396 }
2397 break;
2398 case PCIBAR_ROM:
2399 addr = bar = *valp & mask;
2400 if (memen(pi) && romen(pi)) {
2401 unregister_bar(pi, idx);
2402 }
2403 pi->pi_bar[idx].addr = addr;
2404 pi->pi_bar[idx].lobits = *valp &
2405 PCIM_BIOS_ENABLE;
2406 /* romen could have changed it value */
2407 if (memen(pi) && romen(pi)) {
2408 register_bar(pi, idx);
2409 }
2410 bar |= pi->pi_bar[idx].lobits;
2411 break;
2412 default:
2413 assert(0);
2414 }
2415 pci_set_cfgdata32(pi, coff, bar);
2416
2417 } else if (pci_emul_iscap(pi, coff)) {
2418 pci_emul_capwrite(pi, coff, bytes, *valp, 0, 0);
2419 } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) {
2420 pci_emul_cmdsts_write(pi, coff, *valp, bytes);
2421 } else {
2422 CFGWRITE(pi, coff, *valp, bytes);
2423 }
2424 }
2425 }
2426
2427 #ifdef __amd64__
2428 static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff;
2429
2430 static int
pci_emul_cfgaddr(struct vmctx * ctx __unused,int in,int port __unused,int bytes,uint32_t * eax,void * arg __unused)2431 pci_emul_cfgaddr(struct vmctx *ctx __unused, int in,
2432 int port __unused, int bytes, uint32_t *eax, void *arg __unused)
2433 {
2434 uint32_t x;
2435
2436 if (bytes != 4) {
2437 if (in)
2438 *eax = (bytes == 2) ? 0xffff : 0xff;
2439 return (0);
2440 }
2441
2442 if (in) {
2443 x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff;
2444 if (cfgenable)
2445 x |= CONF1_ENABLE;
2446 *eax = x;
2447 } else {
2448 x = *eax;
2449 cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE;
2450 cfgoff = (x & PCI_REGMAX) & ~0x03;
2451 cfgfunc = (x >> 8) & PCI_FUNCMAX;
2452 cfgslot = (x >> 11) & PCI_SLOTMAX;
2453 cfgbus = (x >> 16) & PCI_BUSMAX;
2454 }
2455
2456 return (0);
2457 }
2458 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr);
2459
2460 static int
pci_emul_cfgdata(struct vmctx * ctx __unused,int in,int port,int bytes,uint32_t * eax,void * arg __unused)2461 pci_emul_cfgdata(struct vmctx *ctx __unused, int in, int port,
2462 int bytes, uint32_t *eax, void *arg __unused)
2463 {
2464 int coff;
2465
2466 assert(bytes == 1 || bytes == 2 || bytes == 4);
2467
2468 coff = cfgoff + (port - CONF1_DATA_PORT);
2469 if (cfgenable) {
2470 pci_cfgrw(in, cfgbus, cfgslot, cfgfunc, coff, bytes, eax);
2471 } else {
2472 /* Ignore accesses to cfgdata if not enabled by cfgaddr */
2473 if (in)
2474 *eax = 0xffffffff;
2475 }
2476 return (0);
2477 }
2478
2479 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata);
2480 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata);
2481 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata);
2482 INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
2483 #endif
2484
2485 #ifdef BHYVE_SNAPSHOT
2486 /*
2487 * Saves/restores PCI device emulated state. Returns 0 on success.
2488 */
2489 static int
pci_snapshot_pci_dev(struct vm_snapshot_meta * meta)2490 pci_snapshot_pci_dev(struct vm_snapshot_meta *meta)
2491 {
2492 struct pci_devinst *pi;
2493 int i;
2494 int ret;
2495
2496 pi = meta->dev_data;
2497
2498 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done);
2499 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done);
2500 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done);
2501 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done);
2502
2503 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done);
2504 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done);
2505 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done);
2506 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done);
2507 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done);
2508 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done);
2509 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done);
2510 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done);
2511
2512 SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata),
2513 meta, ret, done);
2514
2515 for (i = 0; i < (int)nitems(pi->pi_bar); i++) {
2516 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done);
2517 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done);
2518 SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done);
2519 }
2520
2521 /* Restore MSI-X table. */
2522 for (i = 0; i < pi->pi_msix.table_count; i++) {
2523 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr,
2524 meta, ret, done);
2525 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data,
2526 meta, ret, done);
2527 SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control,
2528 meta, ret, done);
2529 }
2530
2531 done:
2532 return (ret);
2533 }
2534
2535 int
pci_snapshot(struct vm_snapshot_meta * meta)2536 pci_snapshot(struct vm_snapshot_meta *meta)
2537 {
2538 struct pci_devemu *pde;
2539 struct pci_devinst *pdi;
2540 int ret;
2541
2542 assert(meta->dev_name != NULL);
2543
2544 pdi = meta->dev_data;
2545 pde = pdi->pi_d;
2546
2547 if (pde->pe_snapshot == NULL)
2548 return (ENOTSUP);
2549
2550 ret = pci_snapshot_pci_dev(meta);
2551 if (ret == 0)
2552 ret = (*pde->pe_snapshot)(meta);
2553
2554 return (ret);
2555 }
2556
2557 int
pci_pause(struct pci_devinst * pdi)2558 pci_pause(struct pci_devinst *pdi)
2559 {
2560 struct pci_devemu *pde = pdi->pi_d;
2561
2562 if (pde->pe_pause == NULL) {
2563 /* The pause/resume functionality is optional. */
2564 return (0);
2565 }
2566
2567 return (*pde->pe_pause)(pdi);
2568 }
2569
2570 int
pci_resume(struct pci_devinst * pdi)2571 pci_resume(struct pci_devinst *pdi)
2572 {
2573 struct pci_devemu *pde = pdi->pi_d;
2574
2575 if (pde->pe_resume == NULL) {
2576 /* The pause/resume functionality is optional. */
2577 return (0);
2578 }
2579
2580 return (*pde->pe_resume)(pdi);
2581 }
2582 #endif
2583
2584 #define PCI_EMUL_TEST
2585 #ifdef PCI_EMUL_TEST
2586 /*
2587 * Define a dummy test device
2588 */
2589 #define DIOSZ 8
2590 #define DMEMSZ 4096
2591 struct pci_emul_dsoftc {
2592 uint8_t ioregs[DIOSZ];
2593 uint8_t memregs[2][DMEMSZ];
2594 };
2595
2596 #define PCI_EMUL_MSI_MSGS 4
2597 #define PCI_EMUL_MSIX_MSGS 16
2598
2599 static int
pci_emul_dinit(struct pci_devinst * pi,nvlist_t * nvl __unused)2600 pci_emul_dinit(struct pci_devinst *pi, nvlist_t *nvl __unused)
2601 {
2602 int error;
2603 struct pci_emul_dsoftc *sc;
2604
2605 sc = calloc(1, sizeof(struct pci_emul_dsoftc));
2606
2607 pi->pi_arg = sc;
2608
2609 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001);
2610 pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD);
2611 pci_set_cfgdata8(pi, PCIR_CLASS, 0x02);
2612
2613 error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS);
2614 assert(error == 0);
2615
2616 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ);
2617 assert(error == 0);
2618
2619 error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ);
2620 assert(error == 0);
2621
2622 error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ);
2623 assert(error == 0);
2624
2625 return (0);
2626 }
2627
2628 static void
pci_emul_diow(struct pci_devinst * pi,int baridx,uint64_t offset,int size,uint64_t value)2629 pci_emul_diow(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
2630 uint64_t value)
2631 {
2632 int i;
2633 struct pci_emul_dsoftc *sc = pi->pi_arg;
2634
2635 if (baridx == 0) {
2636 if (offset + size > DIOSZ) {
2637 printf("diow: iow too large, offset %ld size %d\n",
2638 offset, size);
2639 return;
2640 }
2641
2642 if (size == 1) {
2643 sc->ioregs[offset] = value & 0xff;
2644 } else if (size == 2) {
2645 *(uint16_t *)&sc->ioregs[offset] = value & 0xffff;
2646 } else if (size == 4) {
2647 *(uint32_t *)&sc->ioregs[offset] = value;
2648 } else {
2649 printf("diow: iow unknown size %d\n", size);
2650 }
2651
2652 /*
2653 * Special magic value to generate an interrupt
2654 */
2655 if (offset == 4 && size == 4 && pci_msi_enabled(pi))
2656 pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi));
2657
2658 if (value == 0xabcdef) {
2659 for (i = 0; i < pci_msi_maxmsgnum(pi); i++)
2660 pci_generate_msi(pi, i);
2661 }
2662 }
2663
2664 if (baridx == 1 || baridx == 2) {
2665 if (offset + size > DMEMSZ) {
2666 printf("diow: memw too large, offset %ld size %d\n",
2667 offset, size);
2668 return;
2669 }
2670
2671 i = baridx - 1; /* 'memregs' index */
2672
2673 if (size == 1) {
2674 sc->memregs[i][offset] = value;
2675 } else if (size == 2) {
2676 *(uint16_t *)&sc->memregs[i][offset] = value;
2677 } else if (size == 4) {
2678 *(uint32_t *)&sc->memregs[i][offset] = value;
2679 } else if (size == 8) {
2680 *(uint64_t *)&sc->memregs[i][offset] = value;
2681 } else {
2682 printf("diow: memw unknown size %d\n", size);
2683 }
2684
2685 /*
2686 * magic interrupt ??
2687 */
2688 }
2689
2690 if (baridx > 2 || baridx < 0) {
2691 printf("diow: unknown bar idx %d\n", baridx);
2692 }
2693 }
2694
2695 static uint64_t
pci_emul_dior(struct pci_devinst * pi,int baridx,uint64_t offset,int size)2696 pci_emul_dior(struct pci_devinst *pi, int baridx, uint64_t offset, int size)
2697 {
2698 struct pci_emul_dsoftc *sc = pi->pi_arg;
2699 uint32_t value;
2700 int i;
2701
2702 if (baridx == 0) {
2703 if (offset + size > DIOSZ) {
2704 printf("dior: ior too large, offset %ld size %d\n",
2705 offset, size);
2706 return (0);
2707 }
2708
2709 value = 0;
2710 if (size == 1) {
2711 value = sc->ioregs[offset];
2712 } else if (size == 2) {
2713 value = *(uint16_t *) &sc->ioregs[offset];
2714 } else if (size == 4) {
2715 value = *(uint32_t *) &sc->ioregs[offset];
2716 } else {
2717 printf("dior: ior unknown size %d\n", size);
2718 }
2719 }
2720
2721 if (baridx == 1 || baridx == 2) {
2722 if (offset + size > DMEMSZ) {
2723 printf("dior: memr too large, offset %ld size %d\n",
2724 offset, size);
2725 return (0);
2726 }
2727
2728 i = baridx - 1; /* 'memregs' index */
2729
2730 if (size == 1) {
2731 value = sc->memregs[i][offset];
2732 } else if (size == 2) {
2733 value = *(uint16_t *) &sc->memregs[i][offset];
2734 } else if (size == 4) {
2735 value = *(uint32_t *) &sc->memregs[i][offset];
2736 } else if (size == 8) {
2737 value = *(uint64_t *) &sc->memregs[i][offset];
2738 } else {
2739 printf("dior: ior unknown size %d\n", size);
2740 }
2741 }
2742
2743
2744 if (baridx > 2 || baridx < 0) {
2745 printf("dior: unknown bar idx %d\n", baridx);
2746 return (0);
2747 }
2748
2749 return (value);
2750 }
2751
2752 #ifdef BHYVE_SNAPSHOT
2753 struct pci_devinst *
pci_next(const struct pci_devinst * cursor)2754 pci_next(const struct pci_devinst *cursor)
2755 {
2756 unsigned bus = 0, slot = 0, func = 0;
2757 struct businfo *bi;
2758 struct slotinfo *si;
2759 struct funcinfo *fi;
2760
2761 bus = cursor ? cursor->pi_bus : 0;
2762 slot = cursor ? cursor->pi_slot : 0;
2763 func = cursor ? (cursor->pi_func + 1) : 0;
2764
2765 for (; bus < MAXBUSES; bus++) {
2766 if ((bi = pci_businfo[bus]) == NULL)
2767 continue;
2768
2769 if (slot >= MAXSLOTS)
2770 slot = 0;
2771
2772 for (; slot < MAXSLOTS; slot++) {
2773 si = &bi->slotinfo[slot];
2774 if (func >= MAXFUNCS)
2775 func = 0;
2776 for (; func < MAXFUNCS; func++) {
2777 fi = &si->si_funcs[func];
2778 if (fi->fi_devi == NULL)
2779 continue;
2780
2781 return (fi->fi_devi);
2782 }
2783 }
2784 }
2785
2786 return (NULL);
2787 }
2788
2789 static int
pci_emul_snapshot(struct vm_snapshot_meta * meta __unused)2790 pci_emul_snapshot(struct vm_snapshot_meta *meta __unused)
2791 {
2792 return (0);
2793 }
2794 #endif
2795
2796 static const struct pci_devemu pci_dummy = {
2797 .pe_emu = "dummy",
2798 .pe_init = pci_emul_dinit,
2799 .pe_barwrite = pci_emul_diow,
2800 .pe_barread = pci_emul_dior,
2801 #ifdef BHYVE_SNAPSHOT
2802 .pe_snapshot = pci_emul_snapshot,
2803 #endif
2804 };
2805 PCI_EMUL_SET(pci_dummy);
2806
2807 #endif /* PCI_EMUL_TEST */
2808