xref: /qemu/hw/pci/msix.c (revision ca77ee28e03fb5052be01b9237b0a7cef5d90d4a)
102eb84d0SMichael S. Tsirkin /*
202eb84d0SMichael S. Tsirkin  * MSI-X device support
302eb84d0SMichael S. Tsirkin  *
402eb84d0SMichael S. Tsirkin  * This module includes support for MSI-X in pci devices.
502eb84d0SMichael S. Tsirkin  *
602eb84d0SMichael S. Tsirkin  * Author: Michael S. Tsirkin <mst@redhat.com>
702eb84d0SMichael S. Tsirkin  *
802eb84d0SMichael S. Tsirkin  *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
902eb84d0SMichael S. Tsirkin  *
1002eb84d0SMichael S. Tsirkin  * This work is licensed under the terms of the GNU GPL, version 2.  See
1102eb84d0SMichael S. Tsirkin  * the COPYING file in the top-level directory.
126b620ca3SPaolo Bonzini  *
136b620ca3SPaolo Bonzini  * Contributions after 2012-01-13 are licensed under the terms of the
146b620ca3SPaolo Bonzini  * GNU GPL, version 2 or (at your option) any later version.
1502eb84d0SMichael S. Tsirkin  */
1602eb84d0SMichael S. Tsirkin 
1797d5408fSPeter Maydell #include "qemu/osdep.h"
18c759b24fSMichael S. Tsirkin #include "hw/hw.h"
19c759b24fSMichael S. Tsirkin #include "hw/pci/msi.h"
20c759b24fSMichael S. Tsirkin #include "hw/pci/msix.h"
21c759b24fSMichael S. Tsirkin #include "hw/pci/pci.h"
22428c3eceSStefano Stabellini #include "hw/xen/xen.h"
23*ca77ee28SMarkus Armbruster #include "migration/qemu-file-types.h"
241de7afc9SPaolo Bonzini #include "qemu/range.h"
25ee640c62SCao jin #include "qapi/error.h"
26993b1f4bSPeter Xu #include "trace.h"
2702eb84d0SMichael S. Tsirkin 
282760952bSMichael S. Tsirkin /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
292760952bSMichael S. Tsirkin #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
3002eb84d0SMichael S. Tsirkin #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
315b5cb086SMichael S. Tsirkin #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
3202eb84d0SMichael S. Tsirkin 
334c93bfa9SMichael S. Tsirkin MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
34bc4caf49SJan Kiszka {
35d35e428cSAlex Williamson     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
36bc4caf49SJan Kiszka     MSIMessage msg;
37bc4caf49SJan Kiszka 
38bc4caf49SJan Kiszka     msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
39bc4caf49SJan Kiszka     msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
40bc4caf49SJan Kiszka     return msg;
41bc4caf49SJan Kiszka }
4202eb84d0SMichael S. Tsirkin 
43932d4a42SAlexey Kardashevskiy /*
44932d4a42SAlexey Kardashevskiy  * Special API for POWER to configure the vectors through
45932d4a42SAlexey Kardashevskiy  * a side channel. Should never be used by devices.
46932d4a42SAlexey Kardashevskiy  */
47932d4a42SAlexey Kardashevskiy void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
48932d4a42SAlexey Kardashevskiy {
49932d4a42SAlexey Kardashevskiy     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
50932d4a42SAlexey Kardashevskiy 
51932d4a42SAlexey Kardashevskiy     pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
52932d4a42SAlexey Kardashevskiy     pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
53932d4a42SAlexey Kardashevskiy     table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
54932d4a42SAlexey Kardashevskiy }
55932d4a42SAlexey Kardashevskiy 
5602eb84d0SMichael S. Tsirkin static uint8_t msix_pending_mask(int vector)
5702eb84d0SMichael S. Tsirkin {
5802eb84d0SMichael S. Tsirkin     return 1 << (vector % 8);
5902eb84d0SMichael S. Tsirkin }
6002eb84d0SMichael S. Tsirkin 
6102eb84d0SMichael S. Tsirkin static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
6202eb84d0SMichael S. Tsirkin {
63d35e428cSAlex Williamson     return dev->msix_pba + vector / 8;
6402eb84d0SMichael S. Tsirkin }
6502eb84d0SMichael S. Tsirkin 
6602eb84d0SMichael S. Tsirkin static int msix_is_pending(PCIDevice *dev, int vector)
6702eb84d0SMichael S. Tsirkin {
6802eb84d0SMichael S. Tsirkin     return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
6902eb84d0SMichael S. Tsirkin }
7002eb84d0SMichael S. Tsirkin 
7170f8ee39SMichael S. Tsirkin void msix_set_pending(PCIDevice *dev, unsigned int vector)
7202eb84d0SMichael S. Tsirkin {
7302eb84d0SMichael S. Tsirkin     *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
7402eb84d0SMichael S. Tsirkin }
7502eb84d0SMichael S. Tsirkin 
763bdfaabbSDmitry Fleytman void msix_clr_pending(PCIDevice *dev, int vector)
7702eb84d0SMichael S. Tsirkin {
7802eb84d0SMichael S. Tsirkin     *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
7902eb84d0SMichael S. Tsirkin }
8002eb84d0SMichael S. Tsirkin 
8170f8ee39SMichael S. Tsirkin static bool msix_vector_masked(PCIDevice *dev, unsigned int vector, bool fmask)
8202eb84d0SMichael S. Tsirkin {
83428c3eceSStefano Stabellini     unsigned offset = vector * PCI_MSIX_ENTRY_SIZE;
84e1e4bf22SMichael S. Tsirkin     uint8_t *data = &dev->msix_table[offset + PCI_MSIX_ENTRY_DATA];
85428c3eceSStefano Stabellini     /* MSIs on Xen can be remapped into pirqs. In those cases, masking
86428c3eceSStefano Stabellini      * and unmasking go through the PV evtchn path. */
87e1e4bf22SMichael S. Tsirkin     if (xen_enabled() && xen_is_pirq_msi(pci_get_long(data))) {
88428c3eceSStefano Stabellini         return false;
89428c3eceSStefano Stabellini     }
90428c3eceSStefano Stabellini     return fmask || dev->msix_table[offset + PCI_MSIX_ENTRY_VECTOR_CTRL] &
91428c3eceSStefano Stabellini         PCI_MSIX_ENTRY_CTRL_MASKBIT;
925b5cb086SMichael S. Tsirkin }
935b5cb086SMichael S. Tsirkin 
9470f8ee39SMichael S. Tsirkin bool msix_is_masked(PCIDevice *dev, unsigned int vector)
955b5cb086SMichael S. Tsirkin {
96ae392c41SMichael S. Tsirkin     return msix_vector_masked(dev, vector, dev->msix_function_masked);
97ae392c41SMichael S. Tsirkin }
98ae392c41SMichael S. Tsirkin 
992cdfe53cSJan Kiszka static void msix_fire_vector_notifier(PCIDevice *dev,
1002cdfe53cSJan Kiszka                                       unsigned int vector, bool is_masked)
1012cdfe53cSJan Kiszka {
1022cdfe53cSJan Kiszka     MSIMessage msg;
1032cdfe53cSJan Kiszka     int ret;
1042cdfe53cSJan Kiszka 
1052cdfe53cSJan Kiszka     if (!dev->msix_vector_use_notifier) {
1062cdfe53cSJan Kiszka         return;
1072cdfe53cSJan Kiszka     }
1082cdfe53cSJan Kiszka     if (is_masked) {
1092cdfe53cSJan Kiszka         dev->msix_vector_release_notifier(dev, vector);
1102cdfe53cSJan Kiszka     } else {
1112cdfe53cSJan Kiszka         msg = msix_get_message(dev, vector);
1122cdfe53cSJan Kiszka         ret = dev->msix_vector_use_notifier(dev, vector, msg);
1132cdfe53cSJan Kiszka         assert(ret >= 0);
1142cdfe53cSJan Kiszka     }
1152cdfe53cSJan Kiszka }
1162cdfe53cSJan Kiszka 
117ae392c41SMichael S. Tsirkin static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
118ae392c41SMichael S. Tsirkin {
119ae392c41SMichael S. Tsirkin     bool is_masked = msix_is_masked(dev, vector);
1202cdfe53cSJan Kiszka 
121ae392c41SMichael S. Tsirkin     if (is_masked == was_masked) {
122ae392c41SMichael S. Tsirkin         return;
123ae392c41SMichael S. Tsirkin     }
124ae392c41SMichael S. Tsirkin 
1252cdfe53cSJan Kiszka     msix_fire_vector_notifier(dev, vector, is_masked);
1262cdfe53cSJan Kiszka 
127ae392c41SMichael S. Tsirkin     if (!is_masked && msix_is_pending(dev, vector)) {
1285b5cb086SMichael S. Tsirkin         msix_clr_pending(dev, vector);
1295b5cb086SMichael S. Tsirkin         msix_notify(dev, vector);
1305b5cb086SMichael S. Tsirkin     }
1315b5cb086SMichael S. Tsirkin }
1325b5cb086SMichael S. Tsirkin 
133993b1f4bSPeter Xu static bool msix_masked(PCIDevice *dev)
134993b1f4bSPeter Xu {
135993b1f4bSPeter Xu     return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK;
136993b1f4bSPeter Xu }
137993b1f4bSPeter Xu 
13850322249SMichael S. Tsirkin static void msix_update_function_masked(PCIDevice *dev)
13950322249SMichael S. Tsirkin {
140993b1f4bSPeter Xu     dev->msix_function_masked = !msix_enabled(dev) || msix_masked(dev);
14150322249SMichael S. Tsirkin }
14250322249SMichael S. Tsirkin 
1435b5cb086SMichael S. Tsirkin /* Handle MSI-X capability config write. */
1445b5cb086SMichael S. Tsirkin void msix_write_config(PCIDevice *dev, uint32_t addr,
1455b5cb086SMichael S. Tsirkin                        uint32_t val, int len)
1465b5cb086SMichael S. Tsirkin {
1475b5cb086SMichael S. Tsirkin     unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET;
1485b5cb086SMichael S. Tsirkin     int vector;
14950322249SMichael S. Tsirkin     bool was_masked;
1505b5cb086SMichael S. Tsirkin 
1517c9958b0SJan Kiszka     if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) {
1525b5cb086SMichael S. Tsirkin         return;
1535b5cb086SMichael S. Tsirkin     }
1545b5cb086SMichael S. Tsirkin 
155993b1f4bSPeter Xu     trace_msix_write_config(dev->name, msix_enabled(dev), msix_masked(dev));
156993b1f4bSPeter Xu 
15750322249SMichael S. Tsirkin     was_masked = dev->msix_function_masked;
15850322249SMichael S. Tsirkin     msix_update_function_masked(dev);
15950322249SMichael S. Tsirkin 
1605b5cb086SMichael S. Tsirkin     if (!msix_enabled(dev)) {
1615b5cb086SMichael S. Tsirkin         return;
1625b5cb086SMichael S. Tsirkin     }
1635b5cb086SMichael S. Tsirkin 
164e407bf13SIsaku Yamahata     pci_device_deassert_intx(dev);
1655b5cb086SMichael S. Tsirkin 
16650322249SMichael S. Tsirkin     if (dev->msix_function_masked == was_masked) {
1675b5cb086SMichael S. Tsirkin         return;
1685b5cb086SMichael S. Tsirkin     }
1695b5cb086SMichael S. Tsirkin 
1705b5cb086SMichael S. Tsirkin     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
171ae392c41SMichael S. Tsirkin         msix_handle_mask_update(dev, vector,
172ae392c41SMichael S. Tsirkin                                 msix_vector_masked(dev, vector, was_masked));
1735b5cb086SMichael S. Tsirkin     }
17402eb84d0SMichael S. Tsirkin }
17502eb84d0SMichael S. Tsirkin 
176a8170e5eSAvi Kivity static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr,
177eebcb0a7SAlex Williamson                                      unsigned size)
178eebcb0a7SAlex Williamson {
179eebcb0a7SAlex Williamson     PCIDevice *dev = opaque;
180eebcb0a7SAlex Williamson 
181d35e428cSAlex Williamson     return pci_get_long(dev->msix_table + addr);
182eebcb0a7SAlex Williamson }
183eebcb0a7SAlex Williamson 
184a8170e5eSAvi Kivity static void msix_table_mmio_write(void *opaque, hwaddr addr,
18595524ae8SAvi Kivity                                   uint64_t val, unsigned size)
18602eb84d0SMichael S. Tsirkin {
18702eb84d0SMichael S. Tsirkin     PCIDevice *dev = opaque;
188d35e428cSAlex Williamson     int vector = addr / PCI_MSIX_ENTRY_SIZE;
189ae392c41SMichael S. Tsirkin     bool was_masked;
1909a93b617SMichael S. Tsirkin 
191ae392c41SMichael S. Tsirkin     was_masked = msix_is_masked(dev, vector);
192d35e428cSAlex Williamson     pci_set_long(dev->msix_table + addr, val);
193ae392c41SMichael S. Tsirkin     msix_handle_mask_update(dev, vector, was_masked);
19402eb84d0SMichael S. Tsirkin }
19502eb84d0SMichael S. Tsirkin 
196d35e428cSAlex Williamson static const MemoryRegionOps msix_table_mmio_ops = {
197d35e428cSAlex Williamson     .read = msix_table_mmio_read,
198d35e428cSAlex Williamson     .write = msix_table_mmio_write,
19968d1e1f5SAlexander Graf     .endianness = DEVICE_LITTLE_ENDIAN,
200d35e428cSAlex Williamson     .valid = {
201d35e428cSAlex Williamson         .min_access_size = 4,
202d35e428cSAlex Williamson         .max_access_size = 4,
203d35e428cSAlex Williamson     },
204d35e428cSAlex Williamson };
205d35e428cSAlex Williamson 
206a8170e5eSAvi Kivity static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
207d35e428cSAlex Williamson                                    unsigned size)
208d35e428cSAlex Williamson {
209d35e428cSAlex Williamson     PCIDevice *dev = opaque;
210bbef882cSMichael S. Tsirkin     if (dev->msix_vector_poll_notifier) {
211bbef882cSMichael S. Tsirkin         unsigned vector_start = addr * 8;
212bbef882cSMichael S. Tsirkin         unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr);
213bbef882cSMichael S. Tsirkin         dev->msix_vector_poll_notifier(dev, vector_start, vector_end);
214bbef882cSMichael S. Tsirkin     }
215d35e428cSAlex Williamson 
216d35e428cSAlex Williamson     return pci_get_long(dev->msix_pba + addr);
217d35e428cSAlex Williamson }
218d35e428cSAlex Williamson 
21943b11a91SMarc-André Lureau static void msix_pba_mmio_write(void *opaque, hwaddr addr,
22043b11a91SMarc-André Lureau                                 uint64_t val, unsigned size)
22143b11a91SMarc-André Lureau {
22243b11a91SMarc-André Lureau }
22343b11a91SMarc-André Lureau 
224d35e428cSAlex Williamson static const MemoryRegionOps msix_pba_mmio_ops = {
225d35e428cSAlex Williamson     .read = msix_pba_mmio_read,
22643b11a91SMarc-André Lureau     .write = msix_pba_mmio_write,
22768d1e1f5SAlexander Graf     .endianness = DEVICE_LITTLE_ENDIAN,
22895524ae8SAvi Kivity     .valid = {
22995524ae8SAvi Kivity         .min_access_size = 4,
23095524ae8SAvi Kivity         .max_access_size = 4,
23195524ae8SAvi Kivity     },
23202eb84d0SMichael S. Tsirkin };
23302eb84d0SMichael S. Tsirkin 
234ae1be0bbSMichael S. Tsirkin static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
235ae1be0bbSMichael S. Tsirkin {
236ae1be0bbSMichael S. Tsirkin     int vector;
2375b5f1330SJan Kiszka 
238ae1be0bbSMichael S. Tsirkin     for (vector = 0; vector < nentries; ++vector) {
23901731cfbSJan Kiszka         unsigned offset =
24001731cfbSJan Kiszka             vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
2415b5f1330SJan Kiszka         bool was_masked = msix_is_masked(dev, vector);
2425b5f1330SJan Kiszka 
243d35e428cSAlex Williamson         dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
2445b5f1330SJan Kiszka         msix_handle_mask_update(dev, vector, was_masked);
245ae1be0bbSMichael S. Tsirkin     }
246ae1be0bbSMichael S. Tsirkin }
247ae1be0bbSMichael S. Tsirkin 
248ee640c62SCao jin /*
249ee640c62SCao jin  * Make PCI device @dev MSI-X capable
250ee640c62SCao jin  * @nentries is the max number of MSI-X vectors that the device support.
251ee640c62SCao jin  * @table_bar is the MemoryRegion that MSI-X table structure resides.
252ee640c62SCao jin  * @table_bar_nr is number of base address register corresponding to @table_bar.
253ee640c62SCao jin  * @table_offset indicates the offset that the MSI-X table structure starts with
254ee640c62SCao jin  * in @table_bar.
255ee640c62SCao jin  * @pba_bar is the MemoryRegion that the Pending Bit Array structure resides.
256ee640c62SCao jin  * @pba_bar_nr is number of base address register corresponding to @pba_bar.
257ee640c62SCao jin  * @pba_offset indicates the offset that the Pending Bit Array structure
258ee640c62SCao jin  * starts with in @pba_bar.
259ee640c62SCao jin  * Non-zero @cap_pos puts capability MSI-X at that offset in PCI config space.
260ee640c62SCao jin  * @errp is for returning errors.
261ee640c62SCao jin  *
262ee640c62SCao jin  * Return 0 on success; set @errp and return -errno on error:
263ee640c62SCao jin  * -ENOTSUP means lacking msi support for a msi-capable platform.
264ee640c62SCao jin  * -EINVAL means capability overlap, happens when @cap_pos is non-zero,
265ee640c62SCao jin  * also means a programming error, except device assignment, which can check
266ee640c62SCao jin  * if a real HW is broken.
267ee640c62SCao jin  */
26802eb84d0SMichael S. Tsirkin int msix_init(struct PCIDevice *dev, unsigned short nentries,
2695a2c2029SAlex Williamson               MemoryRegion *table_bar, uint8_t table_bar_nr,
2705a2c2029SAlex Williamson               unsigned table_offset, MemoryRegion *pba_bar,
271ee640c62SCao jin               uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos,
272ee640c62SCao jin               Error **errp)
27302eb84d0SMichael S. Tsirkin {
2745a2c2029SAlex Williamson     int cap;
275d35e428cSAlex Williamson     unsigned table_size, pba_size;
2765a2c2029SAlex Williamson     uint8_t *config;
27702eb84d0SMichael S. Tsirkin 
27860ba3cc2SJan Kiszka     /* Nothing to do if MSI is not supported by interrupt controller */
279226419d6SMichael S. Tsirkin     if (!msi_nonbroken) {
280ee640c62SCao jin         error_setg(errp, "MSI-X is not supported by interrupt controller");
28160ba3cc2SJan Kiszka         return -ENOTSUP;
28260ba3cc2SJan Kiszka     }
2835a2c2029SAlex Williamson 
2845a2c2029SAlex Williamson     if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
285ee640c62SCao jin         error_setg(errp, "The number of MSI-X vectors is invalid");
28602eb84d0SMichael S. Tsirkin         return -EINVAL;
2875a2c2029SAlex Williamson     }
28802eb84d0SMichael S. Tsirkin 
289d35e428cSAlex Williamson     table_size = nentries * PCI_MSIX_ENTRY_SIZE;
290d35e428cSAlex Williamson     pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
291d35e428cSAlex Williamson 
2925a2c2029SAlex Williamson     /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
2935a2c2029SAlex Williamson     if ((table_bar_nr == pba_bar_nr &&
2945a2c2029SAlex Williamson          ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
2955a2c2029SAlex Williamson         table_offset + table_size > memory_region_size(table_bar) ||
2965a2c2029SAlex Williamson         pba_offset + pba_size > memory_region_size(pba_bar) ||
2975a2c2029SAlex Williamson         (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
298ee640c62SCao jin         error_setg(errp, "table & pba overlap, or they don't fit in BARs,"
299ee640c62SCao jin                    " or don't align");
3005a2c2029SAlex Williamson         return -EINVAL;
3015a2c2029SAlex Williamson     }
3025a2c2029SAlex Williamson 
30327841278SMao Zhongyi     cap = pci_add_capability(dev, PCI_CAP_ID_MSIX,
304ee640c62SCao jin                               cap_pos, MSIX_CAP_LENGTH, errp);
3055a2c2029SAlex Williamson     if (cap < 0) {
3065a2c2029SAlex Williamson         return cap;
3075a2c2029SAlex Williamson     }
3085a2c2029SAlex Williamson 
3095a2c2029SAlex Williamson     dev->msix_cap = cap;
3105a2c2029SAlex Williamson     dev->cap_present |= QEMU_PCI_CAP_MSIX;
3115a2c2029SAlex Williamson     config = dev->config + cap;
3125a2c2029SAlex Williamson 
3135a2c2029SAlex Williamson     pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
3145a2c2029SAlex Williamson     dev->msix_entries_nr = nentries;
3155a2c2029SAlex Williamson     dev->msix_function_masked = true;
3165a2c2029SAlex Williamson 
3175a2c2029SAlex Williamson     pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr);
3185a2c2029SAlex Williamson     pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr);
3195a2c2029SAlex Williamson 
3205a2c2029SAlex Williamson     /* Make flags bit writable. */
3215a2c2029SAlex Williamson     dev->wmask[cap + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
3225a2c2029SAlex Williamson                                              MSIX_MASKALL_MASK;
32302eb84d0SMichael S. Tsirkin 
324d35e428cSAlex Williamson     dev->msix_table = g_malloc0(table_size);
325d35e428cSAlex Williamson     dev->msix_pba = g_malloc0(pba_size);
3265a2c2029SAlex Williamson     dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
3275a2c2029SAlex Williamson 
328ae1be0bbSMichael S. Tsirkin     msix_mask_all(dev, nentries);
32902eb84d0SMichael S. Tsirkin 
33040c5dce9SPaolo Bonzini     memory_region_init_io(&dev->msix_table_mmio, OBJECT(dev), &msix_table_mmio_ops, dev,
331d35e428cSAlex Williamson                           "msix-table", table_size);
3325a2c2029SAlex Williamson     memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
33340c5dce9SPaolo Bonzini     memory_region_init_io(&dev->msix_pba_mmio, OBJECT(dev), &msix_pba_mmio_ops, dev,
334d35e428cSAlex Williamson                           "msix-pba", pba_size);
3355a2c2029SAlex Williamson     memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
33602eb84d0SMichael S. Tsirkin 
33702eb84d0SMichael S. Tsirkin     return 0;
33802eb84d0SMichael S. Tsirkin }
33902eb84d0SMichael S. Tsirkin 
34053f94925SAlex Williamson int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
341ee640c62SCao jin                             uint8_t bar_nr, Error **errp)
34253f94925SAlex Williamson {
34353f94925SAlex Williamson     int ret;
34453f94925SAlex Williamson     char *name;
345a0ccd212SJason Wang     uint32_t bar_size = 4096;
346a0ccd212SJason Wang     uint32_t bar_pba_offset = bar_size / 2;
34717323e8bSDongli Zhang     uint32_t bar_pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
34853f94925SAlex Williamson 
34953f94925SAlex Williamson     /*
35053f94925SAlex Williamson      * Migration compatibility dictates that this remains a 4k
35153f94925SAlex Williamson      * BAR with the vector table in the lower half and PBA in
352a0ccd212SJason Wang      * the upper half for nentries which is lower or equal to 128.
353a0ccd212SJason Wang      * No need to care about using more than 65 entries for legacy
354a0ccd212SJason Wang      * machine types who has at most 64 queues.
35553f94925SAlex Williamson      */
356a0ccd212SJason Wang     if (nentries * PCI_MSIX_ENTRY_SIZE > bar_pba_offset) {
357a0ccd212SJason Wang         bar_pba_offset = nentries * PCI_MSIX_ENTRY_SIZE;
358a0ccd212SJason Wang     }
35953f94925SAlex Williamson 
360a0ccd212SJason Wang     if (bar_pba_offset + bar_pba_size > 4096) {
361a0ccd212SJason Wang         bar_size = bar_pba_offset + bar_pba_size;
362a0ccd212SJason Wang     }
363a0ccd212SJason Wang 
3649bff5d81SPeter Maydell     bar_size = pow2ceil(bar_size);
36553f94925SAlex Williamson 
3665f893b4eSGerd Hoffmann     name = g_strdup_printf("%s-msix", dev->name);
367a0ccd212SJason Wang     memory_region_init(&dev->msix_exclusive_bar, OBJECT(dev), name, bar_size);
3685f893b4eSGerd Hoffmann     g_free(name);
36953f94925SAlex Williamson 
37053f94925SAlex Williamson     ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr,
371a0ccd212SJason Wang                     0, &dev->msix_exclusive_bar,
372a0ccd212SJason Wang                     bar_nr, bar_pba_offset,
373ee640c62SCao jin                     0, errp);
37453f94925SAlex Williamson     if (ret) {
37553f94925SAlex Williamson         return ret;
37653f94925SAlex Williamson     }
37753f94925SAlex Williamson 
37853f94925SAlex Williamson     pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY,
37953f94925SAlex Williamson                      &dev->msix_exclusive_bar);
38053f94925SAlex Williamson 
38153f94925SAlex Williamson     return 0;
38253f94925SAlex Williamson }
38353f94925SAlex Williamson 
38498304c84SMichael S. Tsirkin static void msix_free_irq_entries(PCIDevice *dev)
38598304c84SMichael S. Tsirkin {
38698304c84SMichael S. Tsirkin     int vector;
38798304c84SMichael S. Tsirkin 
38898304c84SMichael S. Tsirkin     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
38998304c84SMichael S. Tsirkin         dev->msix_entry_used[vector] = 0;
39098304c84SMichael S. Tsirkin         msix_clr_pending(dev, vector);
39198304c84SMichael S. Tsirkin     }
39298304c84SMichael S. Tsirkin }
39398304c84SMichael S. Tsirkin 
3943cac001eSMichael S. Tsirkin static void msix_clear_all_vectors(PCIDevice *dev)
3953cac001eSMichael S. Tsirkin {
3963cac001eSMichael S. Tsirkin     int vector;
3973cac001eSMichael S. Tsirkin 
3983cac001eSMichael S. Tsirkin     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
3993cac001eSMichael S. Tsirkin         msix_clr_pending(dev, vector);
4003cac001eSMichael S. Tsirkin     }
4013cac001eSMichael S. Tsirkin }
4023cac001eSMichael S. Tsirkin 
40302eb84d0SMichael S. Tsirkin /* Clean up resources for the device. */
404572992eeSAlex Williamson void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
40502eb84d0SMichael S. Tsirkin {
40644701ab7SJan Kiszka     if (!msix_present(dev)) {
407572992eeSAlex Williamson         return;
40844701ab7SJan Kiszka     }
40902eb84d0SMichael S. Tsirkin     pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
41002eb84d0SMichael S. Tsirkin     dev->msix_cap = 0;
41102eb84d0SMichael S. Tsirkin     msix_free_irq_entries(dev);
41202eb84d0SMichael S. Tsirkin     dev->msix_entries_nr = 0;
4135a2c2029SAlex Williamson     memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
414d35e428cSAlex Williamson     g_free(dev->msix_pba);
415d35e428cSAlex Williamson     dev->msix_pba = NULL;
4165a2c2029SAlex Williamson     memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
417d35e428cSAlex Williamson     g_free(dev->msix_table);
418d35e428cSAlex Williamson     dev->msix_table = NULL;
4197267c094SAnthony Liguori     g_free(dev->msix_entry_used);
42002eb84d0SMichael S. Tsirkin     dev->msix_entry_used = NULL;
42102eb84d0SMichael S. Tsirkin     dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
42202eb84d0SMichael S. Tsirkin }
42302eb84d0SMichael S. Tsirkin 
42453f94925SAlex Williamson void msix_uninit_exclusive_bar(PCIDevice *dev)
42553f94925SAlex Williamson {
42653f94925SAlex Williamson     if (msix_present(dev)) {
4275a2c2029SAlex Williamson         msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
42853f94925SAlex Williamson     }
42953f94925SAlex Williamson }
43053f94925SAlex Williamson 
43102eb84d0SMichael S. Tsirkin void msix_save(PCIDevice *dev, QEMUFile *f)
43202eb84d0SMichael S. Tsirkin {
4339a3e12c8SMichael S. Tsirkin     unsigned n = dev->msix_entries_nr;
4349a3e12c8SMichael S. Tsirkin 
43544701ab7SJan Kiszka     if (!msix_present(dev)) {
4369a3e12c8SMichael S. Tsirkin         return;
43772755a70SMichael S. Tsirkin     }
4389a3e12c8SMichael S. Tsirkin 
439d35e428cSAlex Williamson     qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
4400ef1efcfSMarc-André Lureau     qemu_put_buffer(f, dev->msix_pba, DIV_ROUND_UP(n, 8));
44102eb84d0SMichael S. Tsirkin }
44202eb84d0SMichael S. Tsirkin 
44302eb84d0SMichael S. Tsirkin /* Should be called after restoring the config space. */
44402eb84d0SMichael S. Tsirkin void msix_load(PCIDevice *dev, QEMUFile *f)
44502eb84d0SMichael S. Tsirkin {
44602eb84d0SMichael S. Tsirkin     unsigned n = dev->msix_entries_nr;
4472cdfe53cSJan Kiszka     unsigned int vector;
44802eb84d0SMichael S. Tsirkin 
44944701ab7SJan Kiszka     if (!msix_present(dev)) {
45002eb84d0SMichael S. Tsirkin         return;
45198846d73SBlue Swirl     }
45202eb84d0SMichael S. Tsirkin 
4533cac001eSMichael S. Tsirkin     msix_clear_all_vectors(dev);
454d35e428cSAlex Williamson     qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
4550ef1efcfSMarc-André Lureau     qemu_get_buffer(f, dev->msix_pba, DIV_ROUND_UP(n, 8));
45650322249SMichael S. Tsirkin     msix_update_function_masked(dev);
4572cdfe53cSJan Kiszka 
4582cdfe53cSJan Kiszka     for (vector = 0; vector < n; vector++) {
4592cdfe53cSJan Kiszka         msix_handle_mask_update(dev, vector, true);
4602cdfe53cSJan Kiszka     }
46102eb84d0SMichael S. Tsirkin }
46202eb84d0SMichael S. Tsirkin 
46302eb84d0SMichael S. Tsirkin /* Does device support MSI-X? */
46402eb84d0SMichael S. Tsirkin int msix_present(PCIDevice *dev)
46502eb84d0SMichael S. Tsirkin {
46602eb84d0SMichael S. Tsirkin     return dev->cap_present & QEMU_PCI_CAP_MSIX;
46702eb84d0SMichael S. Tsirkin }
46802eb84d0SMichael S. Tsirkin 
46902eb84d0SMichael S. Tsirkin /* Is MSI-X enabled? */
47002eb84d0SMichael S. Tsirkin int msix_enabled(PCIDevice *dev)
47102eb84d0SMichael S. Tsirkin {
47202eb84d0SMichael S. Tsirkin     return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
4732760952bSMichael S. Tsirkin         (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
47402eb84d0SMichael S. Tsirkin          MSIX_ENABLE_MASK);
47502eb84d0SMichael S. Tsirkin }
47602eb84d0SMichael S. Tsirkin 
47702eb84d0SMichael S. Tsirkin /* Send an MSI-X message */
47802eb84d0SMichael S. Tsirkin void msix_notify(PCIDevice *dev, unsigned vector)
47902eb84d0SMichael S. Tsirkin {
480bc4caf49SJan Kiszka     MSIMessage msg;
48102eb84d0SMichael S. Tsirkin 
48293482436SCao jin     if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
48302eb84d0SMichael S. Tsirkin         return;
48493482436SCao jin     }
48593482436SCao jin 
48602eb84d0SMichael S. Tsirkin     if (msix_is_masked(dev, vector)) {
48702eb84d0SMichael S. Tsirkin         msix_set_pending(dev, vector);
48802eb84d0SMichael S. Tsirkin         return;
48902eb84d0SMichael S. Tsirkin     }
49002eb84d0SMichael S. Tsirkin 
491bc4caf49SJan Kiszka     msg = msix_get_message(dev, vector);
492bc4caf49SJan Kiszka 
49338d40ff1SPavel Fedin     msi_send_message(dev, msg);
49402eb84d0SMichael S. Tsirkin }
49502eb84d0SMichael S. Tsirkin 
49602eb84d0SMichael S. Tsirkin void msix_reset(PCIDevice *dev)
49702eb84d0SMichael S. Tsirkin {
49844701ab7SJan Kiszka     if (!msix_present(dev)) {
49902eb84d0SMichael S. Tsirkin         return;
50044701ab7SJan Kiszka     }
5013cac001eSMichael S. Tsirkin     msix_clear_all_vectors(dev);
5022760952bSMichael S. Tsirkin     dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
5032760952bSMichael S. Tsirkin             ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
504d35e428cSAlex Williamson     memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
505d35e428cSAlex Williamson     memset(dev->msix_pba, 0, QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8);
506ae1be0bbSMichael S. Tsirkin     msix_mask_all(dev, dev->msix_entries_nr);
50702eb84d0SMichael S. Tsirkin }
50802eb84d0SMichael S. Tsirkin 
50902eb84d0SMichael S. Tsirkin /* PCI spec suggests that devices make it possible for software to configure
51002eb84d0SMichael S. Tsirkin  * less vectors than supported by the device, but does not specify a standard
51102eb84d0SMichael S. Tsirkin  * mechanism for devices to do so.
51202eb84d0SMichael S. Tsirkin  *
51302eb84d0SMichael S. Tsirkin  * We support this by asking devices to declare vectors software is going to
51402eb84d0SMichael S. Tsirkin  * actually use, and checking this on the notification path. Devices that
51502eb84d0SMichael S. Tsirkin  * don't want to follow the spec suggestion can declare all vectors as used. */
51602eb84d0SMichael S. Tsirkin 
51702eb84d0SMichael S. Tsirkin /* Mark vector as used. */
51802eb84d0SMichael S. Tsirkin int msix_vector_use(PCIDevice *dev, unsigned vector)
51902eb84d0SMichael S. Tsirkin {
52093482436SCao jin     if (vector >= dev->msix_entries_nr) {
52102eb84d0SMichael S. Tsirkin         return -EINVAL;
52293482436SCao jin     }
52393482436SCao jin 
52402eb84d0SMichael S. Tsirkin     dev->msix_entry_used[vector]++;
52502eb84d0SMichael S. Tsirkin     return 0;
52602eb84d0SMichael S. Tsirkin }
52702eb84d0SMichael S. Tsirkin 
52802eb84d0SMichael S. Tsirkin /* Mark vector as unused. */
52902eb84d0SMichael S. Tsirkin void msix_vector_unuse(PCIDevice *dev, unsigned vector)
53002eb84d0SMichael S. Tsirkin {
53198304c84SMichael S. Tsirkin     if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
53298304c84SMichael S. Tsirkin         return;
53398304c84SMichael S. Tsirkin     }
53498304c84SMichael S. Tsirkin     if (--dev->msix_entry_used[vector]) {
53598304c84SMichael S. Tsirkin         return;
53698304c84SMichael S. Tsirkin     }
53798304c84SMichael S. Tsirkin     msix_clr_pending(dev, vector);
53802eb84d0SMichael S. Tsirkin }
539b5f28bcaSMichael S. Tsirkin 
540b5f28bcaSMichael S. Tsirkin void msix_unuse_all_vectors(PCIDevice *dev)
541b5f28bcaSMichael S. Tsirkin {
54244701ab7SJan Kiszka     if (!msix_present(dev)) {
543b5f28bcaSMichael S. Tsirkin         return;
54444701ab7SJan Kiszka     }
545b5f28bcaSMichael S. Tsirkin     msix_free_irq_entries(dev);
546b5f28bcaSMichael S. Tsirkin }
5472cdfe53cSJan Kiszka 
548cb697aaaSJan Kiszka unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
549cb697aaaSJan Kiszka {
550cb697aaaSJan Kiszka     return dev->msix_entries_nr;
551cb697aaaSJan Kiszka }
552cb697aaaSJan Kiszka 
5532cdfe53cSJan Kiszka static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector)
5542cdfe53cSJan Kiszka {
5552cdfe53cSJan Kiszka     MSIMessage msg;
5562cdfe53cSJan Kiszka 
5572cdfe53cSJan Kiszka     if (msix_is_masked(dev, vector)) {
5582cdfe53cSJan Kiszka         return 0;
5592cdfe53cSJan Kiszka     }
5602cdfe53cSJan Kiszka     msg = msix_get_message(dev, vector);
5612cdfe53cSJan Kiszka     return dev->msix_vector_use_notifier(dev, vector, msg);
5622cdfe53cSJan Kiszka }
5632cdfe53cSJan Kiszka 
5642cdfe53cSJan Kiszka static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
5652cdfe53cSJan Kiszka {
5662cdfe53cSJan Kiszka     if (msix_is_masked(dev, vector)) {
5672cdfe53cSJan Kiszka         return;
5682cdfe53cSJan Kiszka     }
5692cdfe53cSJan Kiszka     dev->msix_vector_release_notifier(dev, vector);
5702cdfe53cSJan Kiszka }
5712cdfe53cSJan Kiszka 
5722cdfe53cSJan Kiszka int msix_set_vector_notifiers(PCIDevice *dev,
5732cdfe53cSJan Kiszka                               MSIVectorUseNotifier use_notifier,
574bbef882cSMichael S. Tsirkin                               MSIVectorReleaseNotifier release_notifier,
575bbef882cSMichael S. Tsirkin                               MSIVectorPollNotifier poll_notifier)
5762cdfe53cSJan Kiszka {
5772cdfe53cSJan Kiszka     int vector, ret;
5782cdfe53cSJan Kiszka 
5792cdfe53cSJan Kiszka     assert(use_notifier && release_notifier);
5802cdfe53cSJan Kiszka 
5812cdfe53cSJan Kiszka     dev->msix_vector_use_notifier = use_notifier;
5822cdfe53cSJan Kiszka     dev->msix_vector_release_notifier = release_notifier;
583bbef882cSMichael S. Tsirkin     dev->msix_vector_poll_notifier = poll_notifier;
5842cdfe53cSJan Kiszka 
5852cdfe53cSJan Kiszka     if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
5862cdfe53cSJan Kiszka         (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
5872cdfe53cSJan Kiszka         for (vector = 0; vector < dev->msix_entries_nr; vector++) {
5882cdfe53cSJan Kiszka             ret = msix_set_notifier_for_vector(dev, vector);
5892cdfe53cSJan Kiszka             if (ret < 0) {
5902cdfe53cSJan Kiszka                 goto undo;
5912cdfe53cSJan Kiszka             }
5922cdfe53cSJan Kiszka         }
5932cdfe53cSJan Kiszka     }
594bbef882cSMichael S. Tsirkin     if (dev->msix_vector_poll_notifier) {
595bbef882cSMichael S. Tsirkin         dev->msix_vector_poll_notifier(dev, 0, dev->msix_entries_nr);
596bbef882cSMichael S. Tsirkin     }
5972cdfe53cSJan Kiszka     return 0;
5982cdfe53cSJan Kiszka 
5992cdfe53cSJan Kiszka undo:
6002cdfe53cSJan Kiszka     while (--vector >= 0) {
6012cdfe53cSJan Kiszka         msix_unset_notifier_for_vector(dev, vector);
6022cdfe53cSJan Kiszka     }
6032cdfe53cSJan Kiszka     dev->msix_vector_use_notifier = NULL;
6042cdfe53cSJan Kiszka     dev->msix_vector_release_notifier = NULL;
6052cdfe53cSJan Kiszka     return ret;
6062cdfe53cSJan Kiszka }
6072cdfe53cSJan Kiszka 
6082cdfe53cSJan Kiszka void msix_unset_vector_notifiers(PCIDevice *dev)
6092cdfe53cSJan Kiszka {
6102cdfe53cSJan Kiszka     int vector;
6112cdfe53cSJan Kiszka 
6122cdfe53cSJan Kiszka     assert(dev->msix_vector_use_notifier &&
6132cdfe53cSJan Kiszka            dev->msix_vector_release_notifier);
6142cdfe53cSJan Kiszka 
6152cdfe53cSJan Kiszka     if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
6162cdfe53cSJan Kiszka         (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
6172cdfe53cSJan Kiszka         for (vector = 0; vector < dev->msix_entries_nr; vector++) {
6182cdfe53cSJan Kiszka             msix_unset_notifier_for_vector(dev, vector);
6192cdfe53cSJan Kiszka         }
6202cdfe53cSJan Kiszka     }
6212cdfe53cSJan Kiszka     dev->msix_vector_use_notifier = NULL;
6222cdfe53cSJan Kiszka     dev->msix_vector_release_notifier = NULL;
623bbef882cSMichael S. Tsirkin     dev->msix_vector_poll_notifier = NULL;
6242cdfe53cSJan Kiszka }
625340b50c7SGerd Hoffmann 
6262c21ee76SJianjun Duan static int put_msix_state(QEMUFile *f, void *pv, size_t size,
62703fee66fSMarc-André Lureau                           const VMStateField *field, QJSON *vmdesc)
628340b50c7SGerd Hoffmann {
629340b50c7SGerd Hoffmann     msix_save(pv, f);
6302c21ee76SJianjun Duan 
6312c21ee76SJianjun Duan     return 0;
632340b50c7SGerd Hoffmann }
633340b50c7SGerd Hoffmann 
6342c21ee76SJianjun Duan static int get_msix_state(QEMUFile *f, void *pv, size_t size,
63503fee66fSMarc-André Lureau                           const VMStateField *field)
636340b50c7SGerd Hoffmann {
637340b50c7SGerd Hoffmann     msix_load(pv, f);
638340b50c7SGerd Hoffmann     return 0;
639340b50c7SGerd Hoffmann }
640340b50c7SGerd Hoffmann 
641340b50c7SGerd Hoffmann static VMStateInfo vmstate_info_msix = {
642340b50c7SGerd Hoffmann     .name = "msix state",
643340b50c7SGerd Hoffmann     .get  = get_msix_state,
644340b50c7SGerd Hoffmann     .put  = put_msix_state,
645340b50c7SGerd Hoffmann };
646340b50c7SGerd Hoffmann 
647340b50c7SGerd Hoffmann const VMStateDescription vmstate_msix = {
648340b50c7SGerd Hoffmann     .name = "msix",
649340b50c7SGerd Hoffmann     .fields = (VMStateField[]) {
650340b50c7SGerd Hoffmann         {
651340b50c7SGerd Hoffmann             .name         = "msix",
652340b50c7SGerd Hoffmann             .version_id   = 0,
653340b50c7SGerd Hoffmann             .field_exists = NULL,
654340b50c7SGerd Hoffmann             .size         = 0,   /* ouch */
655340b50c7SGerd Hoffmann             .info         = &vmstate_info_msix,
656340b50c7SGerd Hoffmann             .flags        = VMS_SINGLE,
657340b50c7SGerd Hoffmann             .offset       = 0,
658340b50c7SGerd Hoffmann         },
659340b50c7SGerd Hoffmann         VMSTATE_END_OF_LIST()
660340b50c7SGerd Hoffmann     }
661340b50c7SGerd Hoffmann };
662