xref: /qemu/hw/pci/msix.c (revision 191f90cbea08122b48107f1f9116106fbf3bdfac)
102eb84d0SMichael S. Tsirkin /*
202eb84d0SMichael S. Tsirkin  * MSI-X device support
302eb84d0SMichael S. Tsirkin  *
402eb84d0SMichael S. Tsirkin  * This module includes support for MSI-X in pci devices.
502eb84d0SMichael S. Tsirkin  *
602eb84d0SMichael S. Tsirkin  * Author: Michael S. Tsirkin <mst@redhat.com>
702eb84d0SMichael S. Tsirkin  *
802eb84d0SMichael S. Tsirkin  *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
902eb84d0SMichael S. Tsirkin  *
1002eb84d0SMichael S. Tsirkin  * This work is licensed under the terms of the GNU GPL, version 2.  See
1102eb84d0SMichael S. Tsirkin  * the COPYING file in the top-level directory.
126b620ca3SPaolo Bonzini  *
136b620ca3SPaolo Bonzini  * Contributions after 2012-01-13 are licensed under the terms of the
146b620ca3SPaolo Bonzini  * GNU GPL, version 2 or (at your option) any later version.
1502eb84d0SMichael S. Tsirkin  */
1602eb84d0SMichael S. Tsirkin 
1797d5408fSPeter Maydell #include "qemu/osdep.h"
18c759b24fSMichael S. Tsirkin #include "hw/pci/msi.h"
19c759b24fSMichael S. Tsirkin #include "hw/pci/msix.h"
20c759b24fSMichael S. Tsirkin #include "hw/pci/pci.h"
21428c3eceSStefano Stabellini #include "hw/xen/xen.h"
22ca77ee28SMarkus Armbruster #include "migration/qemu-file-types.h"
23d6454270SMarkus Armbruster #include "migration/vmstate.h"
241de7afc9SPaolo Bonzini #include "qemu/range.h"
25ee640c62SCao jin #include "qapi/error.h"
26993b1f4bSPeter Xu #include "trace.h"
2702eb84d0SMichael S. Tsirkin 
282760952bSMichael S. Tsirkin /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
292760952bSMichael S. Tsirkin #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
3002eb84d0SMichael S. Tsirkin #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
315b5cb086SMichael S. Tsirkin #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
3202eb84d0SMichael S. Tsirkin 
334c93bfa9SMichael S. Tsirkin MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
34bc4caf49SJan Kiszka {
35d35e428cSAlex Williamson     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
36bc4caf49SJan Kiszka     MSIMessage msg;
37bc4caf49SJan Kiszka 
38bc4caf49SJan Kiszka     msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
39bc4caf49SJan Kiszka     msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
40bc4caf49SJan Kiszka     return msg;
41bc4caf49SJan Kiszka }
4202eb84d0SMichael S. Tsirkin 
43932d4a42SAlexey Kardashevskiy /*
44932d4a42SAlexey Kardashevskiy  * Special API for POWER to configure the vectors through
45932d4a42SAlexey Kardashevskiy  * a side channel. Should never be used by devices.
46932d4a42SAlexey Kardashevskiy  */
47932d4a42SAlexey Kardashevskiy void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
48932d4a42SAlexey Kardashevskiy {
49932d4a42SAlexey Kardashevskiy     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
50932d4a42SAlexey Kardashevskiy 
51932d4a42SAlexey Kardashevskiy     pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
52932d4a42SAlexey Kardashevskiy     pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
53932d4a42SAlexey Kardashevskiy     table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
54932d4a42SAlexey Kardashevskiy }
55932d4a42SAlexey Kardashevskiy 
5602eb84d0SMichael S. Tsirkin static uint8_t msix_pending_mask(int vector)
5702eb84d0SMichael S. Tsirkin {
5802eb84d0SMichael S. Tsirkin     return 1 << (vector % 8);
5902eb84d0SMichael S. Tsirkin }
6002eb84d0SMichael S. Tsirkin 
6102eb84d0SMichael S. Tsirkin static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
6202eb84d0SMichael S. Tsirkin {
63d35e428cSAlex Williamson     return dev->msix_pba + vector / 8;
6402eb84d0SMichael S. Tsirkin }
6502eb84d0SMichael S. Tsirkin 
6602eb84d0SMichael S. Tsirkin static int msix_is_pending(PCIDevice *dev, int vector)
6702eb84d0SMichael S. Tsirkin {
6802eb84d0SMichael S. Tsirkin     return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
6902eb84d0SMichael S. Tsirkin }
7002eb84d0SMichael S. Tsirkin 
7170f8ee39SMichael S. Tsirkin void msix_set_pending(PCIDevice *dev, unsigned int vector)
7202eb84d0SMichael S. Tsirkin {
7302eb84d0SMichael S. Tsirkin     *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
7402eb84d0SMichael S. Tsirkin }
7502eb84d0SMichael S. Tsirkin 
763bdfaabbSDmitry Fleytman void msix_clr_pending(PCIDevice *dev, int vector)
7702eb84d0SMichael S. Tsirkin {
7802eb84d0SMichael S. Tsirkin     *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
7902eb84d0SMichael S. Tsirkin }
8002eb84d0SMichael S. Tsirkin 
8170f8ee39SMichael S. Tsirkin static bool msix_vector_masked(PCIDevice *dev, unsigned int vector, bool fmask)
8202eb84d0SMichael S. Tsirkin {
83428c3eceSStefano Stabellini     unsigned offset = vector * PCI_MSIX_ENTRY_SIZE;
84e1e4bf22SMichael S. Tsirkin     uint8_t *data = &dev->msix_table[offset + PCI_MSIX_ENTRY_DATA];
85428c3eceSStefano Stabellini     /* MSIs on Xen can be remapped into pirqs. In those cases, masking
86428c3eceSStefano Stabellini      * and unmasking go through the PV evtchn path. */
87e1e4bf22SMichael S. Tsirkin     if (xen_enabled() && xen_is_pirq_msi(pci_get_long(data))) {
88428c3eceSStefano Stabellini         return false;
89428c3eceSStefano Stabellini     }
90428c3eceSStefano Stabellini     return fmask || dev->msix_table[offset + PCI_MSIX_ENTRY_VECTOR_CTRL] &
91428c3eceSStefano Stabellini         PCI_MSIX_ENTRY_CTRL_MASKBIT;
925b5cb086SMichael S. Tsirkin }
935b5cb086SMichael S. Tsirkin 
9470f8ee39SMichael S. Tsirkin bool msix_is_masked(PCIDevice *dev, unsigned int vector)
955b5cb086SMichael S. Tsirkin {
96ae392c41SMichael S. Tsirkin     return msix_vector_masked(dev, vector, dev->msix_function_masked);
97ae392c41SMichael S. Tsirkin }
98ae392c41SMichael S. Tsirkin 
992cdfe53cSJan Kiszka static void msix_fire_vector_notifier(PCIDevice *dev,
1002cdfe53cSJan Kiszka                                       unsigned int vector, bool is_masked)
1012cdfe53cSJan Kiszka {
1022cdfe53cSJan Kiszka     MSIMessage msg;
1032cdfe53cSJan Kiszka     int ret;
1042cdfe53cSJan Kiszka 
1052cdfe53cSJan Kiszka     if (!dev->msix_vector_use_notifier) {
1062cdfe53cSJan Kiszka         return;
1072cdfe53cSJan Kiszka     }
1082cdfe53cSJan Kiszka     if (is_masked) {
1092cdfe53cSJan Kiszka         dev->msix_vector_release_notifier(dev, vector);
1102cdfe53cSJan Kiszka     } else {
1112cdfe53cSJan Kiszka         msg = msix_get_message(dev, vector);
1122cdfe53cSJan Kiszka         ret = dev->msix_vector_use_notifier(dev, vector, msg);
1132cdfe53cSJan Kiszka         assert(ret >= 0);
1142cdfe53cSJan Kiszka     }
1152cdfe53cSJan Kiszka }
1162cdfe53cSJan Kiszka 
117ae392c41SMichael S. Tsirkin static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
118ae392c41SMichael S. Tsirkin {
119ae392c41SMichael S. Tsirkin     bool is_masked = msix_is_masked(dev, vector);
1202cdfe53cSJan Kiszka 
121ae392c41SMichael S. Tsirkin     if (is_masked == was_masked) {
122ae392c41SMichael S. Tsirkin         return;
123ae392c41SMichael S. Tsirkin     }
124ae392c41SMichael S. Tsirkin 
1252cdfe53cSJan Kiszka     msix_fire_vector_notifier(dev, vector, is_masked);
1262cdfe53cSJan Kiszka 
127ae392c41SMichael S. Tsirkin     if (!is_masked && msix_is_pending(dev, vector)) {
1285b5cb086SMichael S. Tsirkin         msix_clr_pending(dev, vector);
1295b5cb086SMichael S. Tsirkin         msix_notify(dev, vector);
1305b5cb086SMichael S. Tsirkin     }
1315b5cb086SMichael S. Tsirkin }
1325b5cb086SMichael S. Tsirkin 
133993b1f4bSPeter Xu static bool msix_masked(PCIDevice *dev)
134993b1f4bSPeter Xu {
135993b1f4bSPeter Xu     return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK;
136993b1f4bSPeter Xu }
137993b1f4bSPeter Xu 
13850322249SMichael S. Tsirkin static void msix_update_function_masked(PCIDevice *dev)
13950322249SMichael S. Tsirkin {
140993b1f4bSPeter Xu     dev->msix_function_masked = !msix_enabled(dev) || msix_masked(dev);
14150322249SMichael S. Tsirkin }
14250322249SMichael S. Tsirkin 
1435b5cb086SMichael S. Tsirkin /* Handle MSI-X capability config write. */
1445b5cb086SMichael S. Tsirkin void msix_write_config(PCIDevice *dev, uint32_t addr,
1455b5cb086SMichael S. Tsirkin                        uint32_t val, int len)
1465b5cb086SMichael S. Tsirkin {
1475b5cb086SMichael S. Tsirkin     unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET;
1485b5cb086SMichael S. Tsirkin     int vector;
14950322249SMichael S. Tsirkin     bool was_masked;
1505b5cb086SMichael S. Tsirkin 
1517c9958b0SJan Kiszka     if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) {
1525b5cb086SMichael S. Tsirkin         return;
1535b5cb086SMichael S. Tsirkin     }
1545b5cb086SMichael S. Tsirkin 
155993b1f4bSPeter Xu     trace_msix_write_config(dev->name, msix_enabled(dev), msix_masked(dev));
156993b1f4bSPeter Xu 
15750322249SMichael S. Tsirkin     was_masked = dev->msix_function_masked;
15850322249SMichael S. Tsirkin     msix_update_function_masked(dev);
15950322249SMichael S. Tsirkin 
1605b5cb086SMichael S. Tsirkin     if (!msix_enabled(dev)) {
1615b5cb086SMichael S. Tsirkin         return;
1625b5cb086SMichael S. Tsirkin     }
1635b5cb086SMichael S. Tsirkin 
164e407bf13SIsaku Yamahata     pci_device_deassert_intx(dev);
1655b5cb086SMichael S. Tsirkin 
16650322249SMichael S. Tsirkin     if (dev->msix_function_masked == was_masked) {
1675b5cb086SMichael S. Tsirkin         return;
1685b5cb086SMichael S. Tsirkin     }
1695b5cb086SMichael S. Tsirkin 
1705b5cb086SMichael S. Tsirkin     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
171ae392c41SMichael S. Tsirkin         msix_handle_mask_update(dev, vector,
172ae392c41SMichael S. Tsirkin                                 msix_vector_masked(dev, vector, was_masked));
1735b5cb086SMichael S. Tsirkin     }
17402eb84d0SMichael S. Tsirkin }
17502eb84d0SMichael S. Tsirkin 
176a8170e5eSAvi Kivity static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr,
177eebcb0a7SAlex Williamson                                      unsigned size)
178eebcb0a7SAlex Williamson {
179eebcb0a7SAlex Williamson     PCIDevice *dev = opaque;
180eebcb0a7SAlex Williamson 
181d35e428cSAlex Williamson     return pci_get_long(dev->msix_table + addr);
182eebcb0a7SAlex Williamson }
183eebcb0a7SAlex Williamson 
184a8170e5eSAvi Kivity static void msix_table_mmio_write(void *opaque, hwaddr addr,
18595524ae8SAvi Kivity                                   uint64_t val, unsigned size)
18602eb84d0SMichael S. Tsirkin {
18702eb84d0SMichael S. Tsirkin     PCIDevice *dev = opaque;
188d35e428cSAlex Williamson     int vector = addr / PCI_MSIX_ENTRY_SIZE;
189ae392c41SMichael S. Tsirkin     bool was_masked;
1909a93b617SMichael S. Tsirkin 
191ae392c41SMichael S. Tsirkin     was_masked = msix_is_masked(dev, vector);
192d35e428cSAlex Williamson     pci_set_long(dev->msix_table + addr, val);
193ae392c41SMichael S. Tsirkin     msix_handle_mask_update(dev, vector, was_masked);
19402eb84d0SMichael S. Tsirkin }
19502eb84d0SMichael S. Tsirkin 
196d35e428cSAlex Williamson static const MemoryRegionOps msix_table_mmio_ops = {
197d35e428cSAlex Williamson     .read = msix_table_mmio_read,
198d35e428cSAlex Williamson     .write = msix_table_mmio_write,
19968d1e1f5SAlexander Graf     .endianness = DEVICE_LITTLE_ENDIAN,
200d35e428cSAlex Williamson     .valid = {
201d35e428cSAlex Williamson         .min_access_size = 4,
202*191f90cbSMichael S. Tsirkin         .max_access_size = 8,
203*191f90cbSMichael S. Tsirkin     },
204*191f90cbSMichael S. Tsirkin     .impl = {
205d35e428cSAlex Williamson         .max_access_size = 4,
206d35e428cSAlex Williamson     },
207d35e428cSAlex Williamson };
208d35e428cSAlex Williamson 
209a8170e5eSAvi Kivity static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
210d35e428cSAlex Williamson                                    unsigned size)
211d35e428cSAlex Williamson {
212d35e428cSAlex Williamson     PCIDevice *dev = opaque;
213bbef882cSMichael S. Tsirkin     if (dev->msix_vector_poll_notifier) {
214bbef882cSMichael S. Tsirkin         unsigned vector_start = addr * 8;
215bbef882cSMichael S. Tsirkin         unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr);
216bbef882cSMichael S. Tsirkin         dev->msix_vector_poll_notifier(dev, vector_start, vector_end);
217bbef882cSMichael S. Tsirkin     }
218d35e428cSAlex Williamson 
219d35e428cSAlex Williamson     return pci_get_long(dev->msix_pba + addr);
220d35e428cSAlex Williamson }
221d35e428cSAlex Williamson 
22243b11a91SMarc-André Lureau static void msix_pba_mmio_write(void *opaque, hwaddr addr,
22343b11a91SMarc-André Lureau                                 uint64_t val, unsigned size)
22443b11a91SMarc-André Lureau {
22543b11a91SMarc-André Lureau }
22643b11a91SMarc-André Lureau 
227d35e428cSAlex Williamson static const MemoryRegionOps msix_pba_mmio_ops = {
228d35e428cSAlex Williamson     .read = msix_pba_mmio_read,
22943b11a91SMarc-André Lureau     .write = msix_pba_mmio_write,
23068d1e1f5SAlexander Graf     .endianness = DEVICE_LITTLE_ENDIAN,
23195524ae8SAvi Kivity     .valid = {
23295524ae8SAvi Kivity         .min_access_size = 4,
233*191f90cbSMichael S. Tsirkin         .max_access_size = 8,
234*191f90cbSMichael S. Tsirkin     },
235*191f90cbSMichael S. Tsirkin     .impl = {
23695524ae8SAvi Kivity         .max_access_size = 4,
23795524ae8SAvi Kivity     },
23802eb84d0SMichael S. Tsirkin };
23902eb84d0SMichael S. Tsirkin 
240ae1be0bbSMichael S. Tsirkin static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
241ae1be0bbSMichael S. Tsirkin {
242ae1be0bbSMichael S. Tsirkin     int vector;
2435b5f1330SJan Kiszka 
244ae1be0bbSMichael S. Tsirkin     for (vector = 0; vector < nentries; ++vector) {
24501731cfbSJan Kiszka         unsigned offset =
24601731cfbSJan Kiszka             vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
2475b5f1330SJan Kiszka         bool was_masked = msix_is_masked(dev, vector);
2485b5f1330SJan Kiszka 
249d35e428cSAlex Williamson         dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
2505b5f1330SJan Kiszka         msix_handle_mask_update(dev, vector, was_masked);
251ae1be0bbSMichael S. Tsirkin     }
252ae1be0bbSMichael S. Tsirkin }
253ae1be0bbSMichael S. Tsirkin 
254ee640c62SCao jin /*
255ee640c62SCao jin  * Make PCI device @dev MSI-X capable
256ee640c62SCao jin  * @nentries is the max number of MSI-X vectors that the device support.
257ee640c62SCao jin  * @table_bar is the MemoryRegion that MSI-X table structure resides.
258ee640c62SCao jin  * @table_bar_nr is number of base address register corresponding to @table_bar.
259ee640c62SCao jin  * @table_offset indicates the offset that the MSI-X table structure starts with
260ee640c62SCao jin  * in @table_bar.
261ee640c62SCao jin  * @pba_bar is the MemoryRegion that the Pending Bit Array structure resides.
262ee640c62SCao jin  * @pba_bar_nr is number of base address register corresponding to @pba_bar.
263ee640c62SCao jin  * @pba_offset indicates the offset that the Pending Bit Array structure
264ee640c62SCao jin  * starts with in @pba_bar.
265ee640c62SCao jin  * Non-zero @cap_pos puts capability MSI-X at that offset in PCI config space.
266ee640c62SCao jin  * @errp is for returning errors.
267ee640c62SCao jin  *
268ee640c62SCao jin  * Return 0 on success; set @errp and return -errno on error:
269ee640c62SCao jin  * -ENOTSUP means lacking msi support for a msi-capable platform.
270ee640c62SCao jin  * -EINVAL means capability overlap, happens when @cap_pos is non-zero,
271ee640c62SCao jin  * also means a programming error, except device assignment, which can check
272ee640c62SCao jin  * if a real HW is broken.
273ee640c62SCao jin  */
27402eb84d0SMichael S. Tsirkin int msix_init(struct PCIDevice *dev, unsigned short nentries,
2755a2c2029SAlex Williamson               MemoryRegion *table_bar, uint8_t table_bar_nr,
2765a2c2029SAlex Williamson               unsigned table_offset, MemoryRegion *pba_bar,
277ee640c62SCao jin               uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos,
278ee640c62SCao jin               Error **errp)
27902eb84d0SMichael S. Tsirkin {
2805a2c2029SAlex Williamson     int cap;
281d35e428cSAlex Williamson     unsigned table_size, pba_size;
2825a2c2029SAlex Williamson     uint8_t *config;
28302eb84d0SMichael S. Tsirkin 
28460ba3cc2SJan Kiszka     /* Nothing to do if MSI is not supported by interrupt controller */
285226419d6SMichael S. Tsirkin     if (!msi_nonbroken) {
286ee640c62SCao jin         error_setg(errp, "MSI-X is not supported by interrupt controller");
28760ba3cc2SJan Kiszka         return -ENOTSUP;
28860ba3cc2SJan Kiszka     }
2895a2c2029SAlex Williamson 
2905a2c2029SAlex Williamson     if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
291ee640c62SCao jin         error_setg(errp, "The number of MSI-X vectors is invalid");
29202eb84d0SMichael S. Tsirkin         return -EINVAL;
2935a2c2029SAlex Williamson     }
29402eb84d0SMichael S. Tsirkin 
295d35e428cSAlex Williamson     table_size = nentries * PCI_MSIX_ENTRY_SIZE;
296d35e428cSAlex Williamson     pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
297d35e428cSAlex Williamson 
2985a2c2029SAlex Williamson     /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
2995a2c2029SAlex Williamson     if ((table_bar_nr == pba_bar_nr &&
3005a2c2029SAlex Williamson          ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
3015a2c2029SAlex Williamson         table_offset + table_size > memory_region_size(table_bar) ||
3025a2c2029SAlex Williamson         pba_offset + pba_size > memory_region_size(pba_bar) ||
3035a2c2029SAlex Williamson         (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
304ee640c62SCao jin         error_setg(errp, "table & pba overlap, or they don't fit in BARs,"
305ee640c62SCao jin                    " or don't align");
3065a2c2029SAlex Williamson         return -EINVAL;
3075a2c2029SAlex Williamson     }
3085a2c2029SAlex Williamson 
30927841278SMao Zhongyi     cap = pci_add_capability(dev, PCI_CAP_ID_MSIX,
310ee640c62SCao jin                               cap_pos, MSIX_CAP_LENGTH, errp);
3115a2c2029SAlex Williamson     if (cap < 0) {
3125a2c2029SAlex Williamson         return cap;
3135a2c2029SAlex Williamson     }
3145a2c2029SAlex Williamson 
3155a2c2029SAlex Williamson     dev->msix_cap = cap;
3165a2c2029SAlex Williamson     dev->cap_present |= QEMU_PCI_CAP_MSIX;
3175a2c2029SAlex Williamson     config = dev->config + cap;
3185a2c2029SAlex Williamson 
3195a2c2029SAlex Williamson     pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
3205a2c2029SAlex Williamson     dev->msix_entries_nr = nentries;
3215a2c2029SAlex Williamson     dev->msix_function_masked = true;
3225a2c2029SAlex Williamson 
3235a2c2029SAlex Williamson     pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr);
3245a2c2029SAlex Williamson     pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr);
3255a2c2029SAlex Williamson 
3265a2c2029SAlex Williamson     /* Make flags bit writable. */
3275a2c2029SAlex Williamson     dev->wmask[cap + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
3285a2c2029SAlex Williamson                                              MSIX_MASKALL_MASK;
32902eb84d0SMichael S. Tsirkin 
330d35e428cSAlex Williamson     dev->msix_table = g_malloc0(table_size);
331d35e428cSAlex Williamson     dev->msix_pba = g_malloc0(pba_size);
3325a2c2029SAlex Williamson     dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
3335a2c2029SAlex Williamson 
334ae1be0bbSMichael S. Tsirkin     msix_mask_all(dev, nentries);
33502eb84d0SMichael S. Tsirkin 
33640c5dce9SPaolo Bonzini     memory_region_init_io(&dev->msix_table_mmio, OBJECT(dev), &msix_table_mmio_ops, dev,
337d35e428cSAlex Williamson                           "msix-table", table_size);
3385a2c2029SAlex Williamson     memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
33940c5dce9SPaolo Bonzini     memory_region_init_io(&dev->msix_pba_mmio, OBJECT(dev), &msix_pba_mmio_ops, dev,
340d35e428cSAlex Williamson                           "msix-pba", pba_size);
3415a2c2029SAlex Williamson     memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
34202eb84d0SMichael S. Tsirkin 
34302eb84d0SMichael S. Tsirkin     return 0;
34402eb84d0SMichael S. Tsirkin }
34502eb84d0SMichael S. Tsirkin 
34653f94925SAlex Williamson int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
347ee640c62SCao jin                             uint8_t bar_nr, Error **errp)
34853f94925SAlex Williamson {
34953f94925SAlex Williamson     int ret;
35053f94925SAlex Williamson     char *name;
351a0ccd212SJason Wang     uint32_t bar_size = 4096;
352a0ccd212SJason Wang     uint32_t bar_pba_offset = bar_size / 2;
35317323e8bSDongli Zhang     uint32_t bar_pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
35453f94925SAlex Williamson 
35553f94925SAlex Williamson     /*
35653f94925SAlex Williamson      * Migration compatibility dictates that this remains a 4k
35753f94925SAlex Williamson      * BAR with the vector table in the lower half and PBA in
358a0ccd212SJason Wang      * the upper half for nentries which is lower or equal to 128.
359a0ccd212SJason Wang      * No need to care about using more than 65 entries for legacy
360a0ccd212SJason Wang      * machine types who has at most 64 queues.
36153f94925SAlex Williamson      */
362a0ccd212SJason Wang     if (nentries * PCI_MSIX_ENTRY_SIZE > bar_pba_offset) {
363a0ccd212SJason Wang         bar_pba_offset = nentries * PCI_MSIX_ENTRY_SIZE;
364a0ccd212SJason Wang     }
36553f94925SAlex Williamson 
366a0ccd212SJason Wang     if (bar_pba_offset + bar_pba_size > 4096) {
367a0ccd212SJason Wang         bar_size = bar_pba_offset + bar_pba_size;
368a0ccd212SJason Wang     }
369a0ccd212SJason Wang 
3709bff5d81SPeter Maydell     bar_size = pow2ceil(bar_size);
37153f94925SAlex Williamson 
3725f893b4eSGerd Hoffmann     name = g_strdup_printf("%s-msix", dev->name);
373a0ccd212SJason Wang     memory_region_init(&dev->msix_exclusive_bar, OBJECT(dev), name, bar_size);
3745f893b4eSGerd Hoffmann     g_free(name);
37553f94925SAlex Williamson 
37653f94925SAlex Williamson     ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr,
377a0ccd212SJason Wang                     0, &dev->msix_exclusive_bar,
378a0ccd212SJason Wang                     bar_nr, bar_pba_offset,
379ee640c62SCao jin                     0, errp);
38053f94925SAlex Williamson     if (ret) {
38153f94925SAlex Williamson         return ret;
38253f94925SAlex Williamson     }
38353f94925SAlex Williamson 
38453f94925SAlex Williamson     pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY,
38553f94925SAlex Williamson                      &dev->msix_exclusive_bar);
38653f94925SAlex Williamson 
38753f94925SAlex Williamson     return 0;
38853f94925SAlex Williamson }
38953f94925SAlex Williamson 
39098304c84SMichael S. Tsirkin static void msix_free_irq_entries(PCIDevice *dev)
39198304c84SMichael S. Tsirkin {
39298304c84SMichael S. Tsirkin     int vector;
39398304c84SMichael S. Tsirkin 
39498304c84SMichael S. Tsirkin     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
39598304c84SMichael S. Tsirkin         dev->msix_entry_used[vector] = 0;
39698304c84SMichael S. Tsirkin         msix_clr_pending(dev, vector);
39798304c84SMichael S. Tsirkin     }
39898304c84SMichael S. Tsirkin }
39998304c84SMichael S. Tsirkin 
4003cac001eSMichael S. Tsirkin static void msix_clear_all_vectors(PCIDevice *dev)
4013cac001eSMichael S. Tsirkin {
4023cac001eSMichael S. Tsirkin     int vector;
4033cac001eSMichael S. Tsirkin 
4043cac001eSMichael S. Tsirkin     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
4053cac001eSMichael S. Tsirkin         msix_clr_pending(dev, vector);
4063cac001eSMichael S. Tsirkin     }
4073cac001eSMichael S. Tsirkin }
4083cac001eSMichael S. Tsirkin 
40902eb84d0SMichael S. Tsirkin /* Clean up resources for the device. */
410572992eeSAlex Williamson void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
41102eb84d0SMichael S. Tsirkin {
41244701ab7SJan Kiszka     if (!msix_present(dev)) {
413572992eeSAlex Williamson         return;
41444701ab7SJan Kiszka     }
41502eb84d0SMichael S. Tsirkin     pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
41602eb84d0SMichael S. Tsirkin     dev->msix_cap = 0;
41702eb84d0SMichael S. Tsirkin     msix_free_irq_entries(dev);
41802eb84d0SMichael S. Tsirkin     dev->msix_entries_nr = 0;
4195a2c2029SAlex Williamson     memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
420d35e428cSAlex Williamson     g_free(dev->msix_pba);
421d35e428cSAlex Williamson     dev->msix_pba = NULL;
4225a2c2029SAlex Williamson     memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
423d35e428cSAlex Williamson     g_free(dev->msix_table);
424d35e428cSAlex Williamson     dev->msix_table = NULL;
4257267c094SAnthony Liguori     g_free(dev->msix_entry_used);
42602eb84d0SMichael S. Tsirkin     dev->msix_entry_used = NULL;
42702eb84d0SMichael S. Tsirkin     dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
42802eb84d0SMichael S. Tsirkin }
42902eb84d0SMichael S. Tsirkin 
43053f94925SAlex Williamson void msix_uninit_exclusive_bar(PCIDevice *dev)
43153f94925SAlex Williamson {
43253f94925SAlex Williamson     if (msix_present(dev)) {
4335a2c2029SAlex Williamson         msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
43453f94925SAlex Williamson     }
43553f94925SAlex Williamson }
43653f94925SAlex Williamson 
43702eb84d0SMichael S. Tsirkin void msix_save(PCIDevice *dev, QEMUFile *f)
43802eb84d0SMichael S. Tsirkin {
4399a3e12c8SMichael S. Tsirkin     unsigned n = dev->msix_entries_nr;
4409a3e12c8SMichael S. Tsirkin 
44144701ab7SJan Kiszka     if (!msix_present(dev)) {
4429a3e12c8SMichael S. Tsirkin         return;
44372755a70SMichael S. Tsirkin     }
4449a3e12c8SMichael S. Tsirkin 
445d35e428cSAlex Williamson     qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
4460ef1efcfSMarc-André Lureau     qemu_put_buffer(f, dev->msix_pba, DIV_ROUND_UP(n, 8));
44702eb84d0SMichael S. Tsirkin }
44802eb84d0SMichael S. Tsirkin 
44902eb84d0SMichael S. Tsirkin /* Should be called after restoring the config space. */
45002eb84d0SMichael S. Tsirkin void msix_load(PCIDevice *dev, QEMUFile *f)
45102eb84d0SMichael S. Tsirkin {
45202eb84d0SMichael S. Tsirkin     unsigned n = dev->msix_entries_nr;
4532cdfe53cSJan Kiszka     unsigned int vector;
45402eb84d0SMichael S. Tsirkin 
45544701ab7SJan Kiszka     if (!msix_present(dev)) {
45602eb84d0SMichael S. Tsirkin         return;
45798846d73SBlue Swirl     }
45802eb84d0SMichael S. Tsirkin 
4593cac001eSMichael S. Tsirkin     msix_clear_all_vectors(dev);
460d35e428cSAlex Williamson     qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
4610ef1efcfSMarc-André Lureau     qemu_get_buffer(f, dev->msix_pba, DIV_ROUND_UP(n, 8));
46250322249SMichael S. Tsirkin     msix_update_function_masked(dev);
4632cdfe53cSJan Kiszka 
4642cdfe53cSJan Kiszka     for (vector = 0; vector < n; vector++) {
4652cdfe53cSJan Kiszka         msix_handle_mask_update(dev, vector, true);
4662cdfe53cSJan Kiszka     }
46702eb84d0SMichael S. Tsirkin }
46802eb84d0SMichael S. Tsirkin 
46902eb84d0SMichael S. Tsirkin /* Does device support MSI-X? */
47002eb84d0SMichael S. Tsirkin int msix_present(PCIDevice *dev)
47102eb84d0SMichael S. Tsirkin {
47202eb84d0SMichael S. Tsirkin     return dev->cap_present & QEMU_PCI_CAP_MSIX;
47302eb84d0SMichael S. Tsirkin }
47402eb84d0SMichael S. Tsirkin 
47502eb84d0SMichael S. Tsirkin /* Is MSI-X enabled? */
47602eb84d0SMichael S. Tsirkin int msix_enabled(PCIDevice *dev)
47702eb84d0SMichael S. Tsirkin {
47802eb84d0SMichael S. Tsirkin     return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
4792760952bSMichael S. Tsirkin         (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
48002eb84d0SMichael S. Tsirkin          MSIX_ENABLE_MASK);
48102eb84d0SMichael S. Tsirkin }
48202eb84d0SMichael S. Tsirkin 
48302eb84d0SMichael S. Tsirkin /* Send an MSI-X message */
48402eb84d0SMichael S. Tsirkin void msix_notify(PCIDevice *dev, unsigned vector)
48502eb84d0SMichael S. Tsirkin {
486bc4caf49SJan Kiszka     MSIMessage msg;
48702eb84d0SMichael S. Tsirkin 
48893482436SCao jin     if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
48902eb84d0SMichael S. Tsirkin         return;
49093482436SCao jin     }
49193482436SCao jin 
49202eb84d0SMichael S. Tsirkin     if (msix_is_masked(dev, vector)) {
49302eb84d0SMichael S. Tsirkin         msix_set_pending(dev, vector);
49402eb84d0SMichael S. Tsirkin         return;
49502eb84d0SMichael S. Tsirkin     }
49602eb84d0SMichael S. Tsirkin 
497bc4caf49SJan Kiszka     msg = msix_get_message(dev, vector);
498bc4caf49SJan Kiszka 
49938d40ff1SPavel Fedin     msi_send_message(dev, msg);
50002eb84d0SMichael S. Tsirkin }
50102eb84d0SMichael S. Tsirkin 
50202eb84d0SMichael S. Tsirkin void msix_reset(PCIDevice *dev)
50302eb84d0SMichael S. Tsirkin {
50444701ab7SJan Kiszka     if (!msix_present(dev)) {
50502eb84d0SMichael S. Tsirkin         return;
50644701ab7SJan Kiszka     }
5073cac001eSMichael S. Tsirkin     msix_clear_all_vectors(dev);
5082760952bSMichael S. Tsirkin     dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
5092760952bSMichael S. Tsirkin             ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
510d35e428cSAlex Williamson     memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
511d35e428cSAlex Williamson     memset(dev->msix_pba, 0, QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8);
512ae1be0bbSMichael S. Tsirkin     msix_mask_all(dev, dev->msix_entries_nr);
51302eb84d0SMichael S. Tsirkin }
51402eb84d0SMichael S. Tsirkin 
51502eb84d0SMichael S. Tsirkin /* PCI spec suggests that devices make it possible for software to configure
51602eb84d0SMichael S. Tsirkin  * less vectors than supported by the device, but does not specify a standard
51702eb84d0SMichael S. Tsirkin  * mechanism for devices to do so.
51802eb84d0SMichael S. Tsirkin  *
51902eb84d0SMichael S. Tsirkin  * We support this by asking devices to declare vectors software is going to
52002eb84d0SMichael S. Tsirkin  * actually use, and checking this on the notification path. Devices that
52102eb84d0SMichael S. Tsirkin  * don't want to follow the spec suggestion can declare all vectors as used. */
52202eb84d0SMichael S. Tsirkin 
52302eb84d0SMichael S. Tsirkin /* Mark vector as used. */
52402eb84d0SMichael S. Tsirkin int msix_vector_use(PCIDevice *dev, unsigned vector)
52502eb84d0SMichael S. Tsirkin {
52693482436SCao jin     if (vector >= dev->msix_entries_nr) {
52702eb84d0SMichael S. Tsirkin         return -EINVAL;
52893482436SCao jin     }
52993482436SCao jin 
53002eb84d0SMichael S. Tsirkin     dev->msix_entry_used[vector]++;
53102eb84d0SMichael S. Tsirkin     return 0;
53202eb84d0SMichael S. Tsirkin }
53302eb84d0SMichael S. Tsirkin 
53402eb84d0SMichael S. Tsirkin /* Mark vector as unused. */
53502eb84d0SMichael S. Tsirkin void msix_vector_unuse(PCIDevice *dev, unsigned vector)
53602eb84d0SMichael S. Tsirkin {
53798304c84SMichael S. Tsirkin     if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
53898304c84SMichael S. Tsirkin         return;
53998304c84SMichael S. Tsirkin     }
54098304c84SMichael S. Tsirkin     if (--dev->msix_entry_used[vector]) {
54198304c84SMichael S. Tsirkin         return;
54298304c84SMichael S. Tsirkin     }
54398304c84SMichael S. Tsirkin     msix_clr_pending(dev, vector);
54402eb84d0SMichael S. Tsirkin }
545b5f28bcaSMichael S. Tsirkin 
546b5f28bcaSMichael S. Tsirkin void msix_unuse_all_vectors(PCIDevice *dev)
547b5f28bcaSMichael S. Tsirkin {
54844701ab7SJan Kiszka     if (!msix_present(dev)) {
549b5f28bcaSMichael S. Tsirkin         return;
55044701ab7SJan Kiszka     }
551b5f28bcaSMichael S. Tsirkin     msix_free_irq_entries(dev);
552b5f28bcaSMichael S. Tsirkin }
5532cdfe53cSJan Kiszka 
554cb697aaaSJan Kiszka unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
555cb697aaaSJan Kiszka {
556cb697aaaSJan Kiszka     return dev->msix_entries_nr;
557cb697aaaSJan Kiszka }
558cb697aaaSJan Kiszka 
5592cdfe53cSJan Kiszka static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector)
5602cdfe53cSJan Kiszka {
5612cdfe53cSJan Kiszka     MSIMessage msg;
5622cdfe53cSJan Kiszka 
5632cdfe53cSJan Kiszka     if (msix_is_masked(dev, vector)) {
5642cdfe53cSJan Kiszka         return 0;
5652cdfe53cSJan Kiszka     }
5662cdfe53cSJan Kiszka     msg = msix_get_message(dev, vector);
5672cdfe53cSJan Kiszka     return dev->msix_vector_use_notifier(dev, vector, msg);
5682cdfe53cSJan Kiszka }
5692cdfe53cSJan Kiszka 
5702cdfe53cSJan Kiszka static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
5712cdfe53cSJan Kiszka {
5722cdfe53cSJan Kiszka     if (msix_is_masked(dev, vector)) {
5732cdfe53cSJan Kiszka         return;
5742cdfe53cSJan Kiszka     }
5752cdfe53cSJan Kiszka     dev->msix_vector_release_notifier(dev, vector);
5762cdfe53cSJan Kiszka }
5772cdfe53cSJan Kiszka 
5782cdfe53cSJan Kiszka int msix_set_vector_notifiers(PCIDevice *dev,
5792cdfe53cSJan Kiszka                               MSIVectorUseNotifier use_notifier,
580bbef882cSMichael S. Tsirkin                               MSIVectorReleaseNotifier release_notifier,
581bbef882cSMichael S. Tsirkin                               MSIVectorPollNotifier poll_notifier)
5822cdfe53cSJan Kiszka {
5832cdfe53cSJan Kiszka     int vector, ret;
5842cdfe53cSJan Kiszka 
5852cdfe53cSJan Kiszka     assert(use_notifier && release_notifier);
5862cdfe53cSJan Kiszka 
5872cdfe53cSJan Kiszka     dev->msix_vector_use_notifier = use_notifier;
5882cdfe53cSJan Kiszka     dev->msix_vector_release_notifier = release_notifier;
589bbef882cSMichael S. Tsirkin     dev->msix_vector_poll_notifier = poll_notifier;
5902cdfe53cSJan Kiszka 
5912cdfe53cSJan Kiszka     if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
5922cdfe53cSJan Kiszka         (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
5932cdfe53cSJan Kiszka         for (vector = 0; vector < dev->msix_entries_nr; vector++) {
5942cdfe53cSJan Kiszka             ret = msix_set_notifier_for_vector(dev, vector);
5952cdfe53cSJan Kiszka             if (ret < 0) {
5962cdfe53cSJan Kiszka                 goto undo;
5972cdfe53cSJan Kiszka             }
5982cdfe53cSJan Kiszka         }
5992cdfe53cSJan Kiszka     }
600bbef882cSMichael S. Tsirkin     if (dev->msix_vector_poll_notifier) {
601bbef882cSMichael S. Tsirkin         dev->msix_vector_poll_notifier(dev, 0, dev->msix_entries_nr);
602bbef882cSMichael S. Tsirkin     }
6032cdfe53cSJan Kiszka     return 0;
6042cdfe53cSJan Kiszka 
6052cdfe53cSJan Kiszka undo:
6062cdfe53cSJan Kiszka     while (--vector >= 0) {
6072cdfe53cSJan Kiszka         msix_unset_notifier_for_vector(dev, vector);
6082cdfe53cSJan Kiszka     }
6092cdfe53cSJan Kiszka     dev->msix_vector_use_notifier = NULL;
6102cdfe53cSJan Kiszka     dev->msix_vector_release_notifier = NULL;
6112cdfe53cSJan Kiszka     return ret;
6122cdfe53cSJan Kiszka }
6132cdfe53cSJan Kiszka 
6142cdfe53cSJan Kiszka void msix_unset_vector_notifiers(PCIDevice *dev)
6152cdfe53cSJan Kiszka {
6162cdfe53cSJan Kiszka     int vector;
6172cdfe53cSJan Kiszka 
6182cdfe53cSJan Kiszka     assert(dev->msix_vector_use_notifier &&
6192cdfe53cSJan Kiszka            dev->msix_vector_release_notifier);
6202cdfe53cSJan Kiszka 
6212cdfe53cSJan Kiszka     if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
6222cdfe53cSJan Kiszka         (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
6232cdfe53cSJan Kiszka         for (vector = 0; vector < dev->msix_entries_nr; vector++) {
6242cdfe53cSJan Kiszka             msix_unset_notifier_for_vector(dev, vector);
6252cdfe53cSJan Kiszka         }
6262cdfe53cSJan Kiszka     }
6272cdfe53cSJan Kiszka     dev->msix_vector_use_notifier = NULL;
6282cdfe53cSJan Kiszka     dev->msix_vector_release_notifier = NULL;
629bbef882cSMichael S. Tsirkin     dev->msix_vector_poll_notifier = NULL;
6302cdfe53cSJan Kiszka }
631340b50c7SGerd Hoffmann 
6322c21ee76SJianjun Duan static int put_msix_state(QEMUFile *f, void *pv, size_t size,
63303fee66fSMarc-André Lureau                           const VMStateField *field, QJSON *vmdesc)
634340b50c7SGerd Hoffmann {
635340b50c7SGerd Hoffmann     msix_save(pv, f);
6362c21ee76SJianjun Duan 
6372c21ee76SJianjun Duan     return 0;
638340b50c7SGerd Hoffmann }
639340b50c7SGerd Hoffmann 
6402c21ee76SJianjun Duan static int get_msix_state(QEMUFile *f, void *pv, size_t size,
64103fee66fSMarc-André Lureau                           const VMStateField *field)
642340b50c7SGerd Hoffmann {
643340b50c7SGerd Hoffmann     msix_load(pv, f);
644340b50c7SGerd Hoffmann     return 0;
645340b50c7SGerd Hoffmann }
646340b50c7SGerd Hoffmann 
647340b50c7SGerd Hoffmann static VMStateInfo vmstate_info_msix = {
648340b50c7SGerd Hoffmann     .name = "msix state",
649340b50c7SGerd Hoffmann     .get  = get_msix_state,
650340b50c7SGerd Hoffmann     .put  = put_msix_state,
651340b50c7SGerd Hoffmann };
652340b50c7SGerd Hoffmann 
653340b50c7SGerd Hoffmann const VMStateDescription vmstate_msix = {
654340b50c7SGerd Hoffmann     .name = "msix",
655340b50c7SGerd Hoffmann     .fields = (VMStateField[]) {
656340b50c7SGerd Hoffmann         {
657340b50c7SGerd Hoffmann             .name         = "msix",
658340b50c7SGerd Hoffmann             .version_id   = 0,
659340b50c7SGerd Hoffmann             .field_exists = NULL,
660340b50c7SGerd Hoffmann             .size         = 0,   /* ouch */
661340b50c7SGerd Hoffmann             .info         = &vmstate_info_msix,
662340b50c7SGerd Hoffmann             .flags        = VMS_SINGLE,
663340b50c7SGerd Hoffmann             .offset       = 0,
664340b50c7SGerd Hoffmann         },
665340b50c7SGerd Hoffmann         VMSTATE_END_OF_LIST()
666340b50c7SGerd Hoffmann     }
667340b50c7SGerd Hoffmann };
668