xref: /qemu/hw/pci/msix.c (revision b69801dd6b1eb4d107f7c2f643adf0a4e3ec9124)
1 /*
2  * MSI-X device support
3  *
4  * This module includes support for MSI-X in pci devices.
5  *
6  * Author: Michael S. Tsirkin <mst@redhat.com>
7  *
8  *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2.  See
11  * the COPYING file in the top-level directory.
12  *
13  * Contributions after 2012-01-13 are licensed under the terms of the
14  * GNU GPL, version 2 or (at your option) any later version.
15  */
16 
17 #include "qemu/osdep.h"
18 #include "qemu/log.h"
19 #include "hw/pci/msi.h"
20 #include "hw/pci/msix.h"
21 #include "hw/pci/pci.h"
22 #include "hw/xen/xen.h"
23 #include "system/xen.h"
24 #include "migration/qemu-file-types.h"
25 #include "migration/vmstate.h"
26 #include "qemu/range.h"
27 #include "qapi/error.h"
28 #include "trace.h"
29 
30 #include "hw/i386/kvm/xen_evtchn.h"
31 
32 /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
33 #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
34 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
35 #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
36 
msix_prepare_message(PCIDevice * dev,unsigned vector)37 static MSIMessage msix_prepare_message(PCIDevice *dev, unsigned vector)
38 {
39     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
40     MSIMessage msg;
41 
42     msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
43     msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
44     return msg;
45 }
46 
msix_get_message(PCIDevice * dev,unsigned vector)47 MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
48 {
49     return dev->msix_prepare_message(dev, vector);
50 }
51 
52 /*
53  * Special API for POWER to configure the vectors through
54  * a side channel. Should never be used by devices.
55  */
msix_set_message(PCIDevice * dev,int vector,struct MSIMessage msg)56 void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
57 {
58     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
59 
60     pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
61     pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
62     table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
63 }
64 
msix_pending_mask(int vector)65 static uint8_t msix_pending_mask(int vector)
66 {
67     return 1 << (vector % 8);
68 }
69 
msix_pending_byte(PCIDevice * dev,int vector)70 static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
71 {
72     return dev->msix_pba + vector / 8;
73 }
74 
msix_is_pending(PCIDevice * dev,int vector)75 static int msix_is_pending(PCIDevice *dev, int vector)
76 {
77     return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
78 }
79 
msix_set_pending(PCIDevice * dev,unsigned int vector)80 void msix_set_pending(PCIDevice *dev, unsigned int vector)
81 {
82     *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
83 }
84 
msix_clr_pending(PCIDevice * dev,int vector)85 void msix_clr_pending(PCIDevice *dev, int vector)
86 {
87     *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
88 }
89 
msix_vector_masked(PCIDevice * dev,unsigned int vector,bool fmask)90 static bool msix_vector_masked(PCIDevice *dev, unsigned int vector, bool fmask)
91 {
92     unsigned offset = vector * PCI_MSIX_ENTRY_SIZE;
93     uint8_t *data = &dev->msix_table[offset + PCI_MSIX_ENTRY_DATA];
94     /* MSIs on Xen can be remapped into pirqs. In those cases, masking
95      * and unmasking go through the PV evtchn path. */
96     if (xen_enabled() && xen_is_pirq_msi(pci_get_long(data))) {
97         return false;
98     }
99     return fmask || dev->msix_table[offset + PCI_MSIX_ENTRY_VECTOR_CTRL] &
100         PCI_MSIX_ENTRY_CTRL_MASKBIT;
101 }
102 
msix_is_masked(PCIDevice * dev,unsigned int vector)103 bool msix_is_masked(PCIDevice *dev, unsigned int vector)
104 {
105     return msix_vector_masked(dev, vector, dev->msix_function_masked);
106 }
107 
msix_fire_vector_notifier(PCIDevice * dev,unsigned int vector,bool is_masked)108 static void msix_fire_vector_notifier(PCIDevice *dev,
109                                       unsigned int vector, bool is_masked)
110 {
111     MSIMessage msg;
112     int ret;
113 
114     if (!dev->msix_vector_use_notifier) {
115         return;
116     }
117     if (is_masked) {
118         dev->msix_vector_release_notifier(dev, vector);
119     } else {
120         msg = msix_get_message(dev, vector);
121         ret = dev->msix_vector_use_notifier(dev, vector, msg);
122         assert(ret >= 0);
123     }
124 }
125 
msix_handle_mask_update(PCIDevice * dev,int vector,bool was_masked)126 static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
127 {
128     bool is_masked = msix_is_masked(dev, vector);
129 
130     if (xen_mode == XEN_EMULATE) {
131         MSIMessage msg = msix_prepare_message(dev, vector);
132 
133         xen_evtchn_snoop_msi(dev, true, vector, msg.address, msg.data,
134                              is_masked);
135     }
136 
137     if (is_masked == was_masked) {
138         return;
139     }
140 
141     msix_fire_vector_notifier(dev, vector, is_masked);
142 
143     if (!is_masked && msix_is_pending(dev, vector)) {
144         msix_clr_pending(dev, vector);
145         msix_notify(dev, vector);
146     }
147 }
148 
msix_set_mask(PCIDevice * dev,int vector,bool mask)149 void msix_set_mask(PCIDevice *dev, int vector, bool mask)
150 {
151     unsigned offset;
152     bool was_masked;
153 
154     assert(vector < dev->msix_entries_nr);
155 
156     offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
157 
158     was_masked = msix_is_masked(dev, vector);
159 
160     if (mask) {
161         dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
162     } else {
163         dev->msix_table[offset] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
164     }
165 
166     msix_handle_mask_update(dev, vector, was_masked);
167 }
168 
msix_masked(PCIDevice * dev)169 static bool msix_masked(PCIDevice *dev)
170 {
171     return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK;
172 }
173 
msix_update_function_masked(PCIDevice * dev)174 static void msix_update_function_masked(PCIDevice *dev)
175 {
176     dev->msix_function_masked = !msix_enabled(dev) || msix_masked(dev);
177 }
178 
179 /* Handle MSI-X capability config write. */
msix_write_config(PCIDevice * dev,uint32_t addr,uint32_t val,int len)180 void msix_write_config(PCIDevice *dev, uint32_t addr,
181                        uint32_t val, int len)
182 {
183     unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET;
184     int vector;
185     bool was_masked;
186 
187     if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) {
188         return;
189     }
190 
191     trace_msix_write_config(dev->name, msix_enabled(dev), msix_masked(dev));
192 
193     was_masked = dev->msix_function_masked;
194     msix_update_function_masked(dev);
195 
196     if (!msix_enabled(dev)) {
197         return;
198     }
199 
200     pci_device_deassert_intx(dev);
201 
202     if (dev->msix_function_masked == was_masked) {
203         return;
204     }
205 
206     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
207         msix_handle_mask_update(dev, vector,
208                                 msix_vector_masked(dev, vector, was_masked));
209     }
210 }
211 
msix_table_mmio_read(void * opaque,hwaddr addr,unsigned size)212 static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr,
213                                      unsigned size)
214 {
215     PCIDevice *dev = opaque;
216 
217     assert(addr + size <= dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
218     return pci_get_long(dev->msix_table + addr);
219 }
220 
msix_table_mmio_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)221 static void msix_table_mmio_write(void *opaque, hwaddr addr,
222                                   uint64_t val, unsigned size)
223 {
224     PCIDevice *dev = opaque;
225     int vector = addr / PCI_MSIX_ENTRY_SIZE;
226     bool was_masked;
227 
228     assert(addr + size <= dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
229 
230     was_masked = msix_is_masked(dev, vector);
231     pci_set_long(dev->msix_table + addr, val);
232     msix_handle_mask_update(dev, vector, was_masked);
233 }
234 
235 static const MemoryRegionOps msix_table_mmio_ops = {
236     .read = msix_table_mmio_read,
237     .write = msix_table_mmio_write,
238     .endianness = DEVICE_LITTLE_ENDIAN,
239     .valid = {
240         .min_access_size = 4,
241         .max_access_size = 8,
242     },
243     .impl = {
244         .max_access_size = 4,
245     },
246 };
247 
msix_pba_mmio_read(void * opaque,hwaddr addr,unsigned size)248 static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
249                                    unsigned size)
250 {
251     PCIDevice *dev = opaque;
252     if (dev->msix_vector_poll_notifier) {
253         unsigned vector_start = addr * 8;
254         unsigned vector_end = MIN((addr + size) * 8, dev->msix_entries_nr);
255         dev->msix_vector_poll_notifier(dev, vector_start, vector_end);
256     }
257 
258     return pci_get_long(dev->msix_pba + addr);
259 }
260 
msix_pba_mmio_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)261 static void msix_pba_mmio_write(void *opaque, hwaddr addr,
262                                 uint64_t val, unsigned size)
263 {
264     PCIDevice *dev = opaque;
265 
266     qemu_log_mask(LOG_GUEST_ERROR,
267                   "PCI [%s:%02x:%02x.%x] attempt to write to MSI-X "
268                   "PBA at 0x%" FMT_PCIBUS ", ignoring.\n",
269                   pci_root_bus_path(dev), pci_dev_bus_num(dev),
270                   PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
271                   addr);
272 }
273 
274 static const MemoryRegionOps msix_pba_mmio_ops = {
275     .read = msix_pba_mmio_read,
276     .write = msix_pba_mmio_write,
277     .endianness = DEVICE_LITTLE_ENDIAN,
278     .valid = {
279         .min_access_size = 4,
280         .max_access_size = 8,
281     },
282     .impl = {
283         .max_access_size = 4,
284     },
285 };
286 
msix_mask_all(struct PCIDevice * dev,unsigned nentries)287 static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
288 {
289     int vector;
290 
291     for (vector = 0; vector < nentries; ++vector) {
292         unsigned offset =
293             vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
294         bool was_masked = msix_is_masked(dev, vector);
295 
296         dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
297         msix_handle_mask_update(dev, vector, was_masked);
298     }
299 }
300 
301 /*
302  * Make PCI device @dev MSI-X capable
303  * @nentries is the max number of MSI-X vectors that the device support.
304  * @table_bar is the MemoryRegion that MSI-X table structure resides.
305  * @table_bar_nr is number of base address register corresponding to @table_bar.
306  * @table_offset indicates the offset that the MSI-X table structure starts with
307  * in @table_bar.
308  * @pba_bar is the MemoryRegion that the Pending Bit Array structure resides.
309  * @pba_bar_nr is number of base address register corresponding to @pba_bar.
310  * @pba_offset indicates the offset that the Pending Bit Array structure
311  * starts with in @pba_bar.
312  * Non-zero @cap_pos puts capability MSI-X at that offset in PCI config space.
313  * @errp is for returning errors.
314  *
315  * Return 0 on success; set @errp and return -errno on error:
316  * -ENOTSUP means lacking msi support for a msi-capable platform.
317  * -EINVAL means capability overlap, happens when @cap_pos is non-zero,
318  * also means a programming error, except device assignment, which can check
319  * if a real HW is broken.
320  */
msix_init(struct PCIDevice * dev,unsigned short nentries,MemoryRegion * table_bar,uint8_t table_bar_nr,unsigned table_offset,MemoryRegion * pba_bar,uint8_t pba_bar_nr,unsigned pba_offset,uint8_t cap_pos,Error ** errp)321 int msix_init(struct PCIDevice *dev, unsigned short nentries,
322               MemoryRegion *table_bar, uint8_t table_bar_nr,
323               unsigned table_offset, MemoryRegion *pba_bar,
324               uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos,
325               Error **errp)
326 {
327     int cap;
328     unsigned table_size, pba_size;
329     uint8_t *config;
330 
331     /* Nothing to do if MSI is not supported by interrupt controller */
332     if (!msi_nonbroken) {
333         error_setg(errp, "MSI-X is not supported by interrupt controller");
334         return -ENOTSUP;
335     }
336 
337     if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
338         error_setg(errp, "The number of MSI-X vectors is invalid");
339         return -EINVAL;
340     }
341 
342     table_size = nentries * PCI_MSIX_ENTRY_SIZE;
343     pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
344 
345     /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
346     if ((table_bar_nr == pba_bar_nr &&
347          ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
348         table_offset + table_size > memory_region_size(table_bar) ||
349         pba_offset + pba_size > memory_region_size(pba_bar) ||
350         (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
351         error_setg(errp, "table & pba overlap, or they don't fit in BARs,"
352                    " or don't align");
353         return -EINVAL;
354     }
355 
356     cap = pci_add_capability(dev, PCI_CAP_ID_MSIX,
357                               cap_pos, MSIX_CAP_LENGTH, errp);
358     if (cap < 0) {
359         return cap;
360     }
361 
362     dev->msix_cap = cap;
363     dev->cap_present |= QEMU_PCI_CAP_MSIX;
364     config = dev->config + cap;
365 
366     pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
367     dev->msix_entries_nr = nentries;
368     dev->msix_function_masked = true;
369 
370     pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr);
371     pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr);
372 
373     /* Make flags bit writable. */
374     dev->wmask[cap + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
375                                              MSIX_MASKALL_MASK;
376 
377     dev->msix_table = g_malloc0(table_size);
378     dev->msix_pba = g_malloc0(pba_size);
379     dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
380 
381     msix_mask_all(dev, nentries);
382 
383     memory_region_init_io(&dev->msix_table_mmio, OBJECT(dev), &msix_table_mmio_ops, dev,
384                           "msix-table", table_size);
385     memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
386     memory_region_init_io(&dev->msix_pba_mmio, OBJECT(dev), &msix_pba_mmio_ops, dev,
387                           "msix-pba", pba_size);
388     memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
389 
390     dev->msix_prepare_message = msix_prepare_message;
391 
392     return 0;
393 }
394 
msix_init_exclusive_bar(PCIDevice * dev,unsigned short nentries,uint8_t bar_nr,Error ** errp)395 int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
396                             uint8_t bar_nr, Error **errp)
397 {
398     int ret;
399     char *name;
400     uint32_t bar_size = 4096;
401     uint32_t bar_pba_offset = bar_size / 2;
402     uint32_t bar_pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
403 
404     /*
405      * Migration compatibility dictates that this remains a 4k
406      * BAR with the vector table in the lower half and PBA in
407      * the upper half for nentries which is lower or equal to 128.
408      * No need to care about using more than 65 entries for legacy
409      * machine types who has at most 64 queues.
410      */
411     if (nentries * PCI_MSIX_ENTRY_SIZE > bar_pba_offset) {
412         bar_pba_offset = nentries * PCI_MSIX_ENTRY_SIZE;
413     }
414 
415     if (bar_pba_offset + bar_pba_size > 4096) {
416         bar_size = bar_pba_offset + bar_pba_size;
417     }
418 
419     bar_size = pow2ceil(bar_size);
420 
421     name = g_strdup_printf("%s-msix", dev->name);
422     memory_region_init(&dev->msix_exclusive_bar, OBJECT(dev), name, bar_size);
423     g_free(name);
424 
425     ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr,
426                     0, &dev->msix_exclusive_bar,
427                     bar_nr, bar_pba_offset,
428                     0, errp);
429     if (ret) {
430         return ret;
431     }
432 
433     pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY,
434                      &dev->msix_exclusive_bar);
435 
436     return 0;
437 }
438 
msix_free_irq_entries(PCIDevice * dev)439 static void msix_free_irq_entries(PCIDevice *dev)
440 {
441     int vector;
442 
443     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
444         dev->msix_entry_used[vector] = 0;
445         msix_clr_pending(dev, vector);
446     }
447 }
448 
msix_clear_all_vectors(PCIDevice * dev)449 static void msix_clear_all_vectors(PCIDevice *dev)
450 {
451     int vector;
452 
453     for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
454         msix_clr_pending(dev, vector);
455     }
456 }
457 
458 /* Clean up resources for the device. */
msix_uninit(PCIDevice * dev,MemoryRegion * table_bar,MemoryRegion * pba_bar)459 void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
460 {
461     if (!msix_present(dev)) {
462         return;
463     }
464     pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
465     dev->msix_cap = 0;
466     msix_free_irq_entries(dev);
467     dev->msix_entries_nr = 0;
468     memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
469     g_free(dev->msix_pba);
470     dev->msix_pba = NULL;
471     memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
472     g_free(dev->msix_table);
473     dev->msix_table = NULL;
474     g_free(dev->msix_entry_used);
475     dev->msix_entry_used = NULL;
476     dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
477     dev->msix_prepare_message = NULL;
478 }
479 
msix_uninit_exclusive_bar(PCIDevice * dev)480 void msix_uninit_exclusive_bar(PCIDevice *dev)
481 {
482     if (msix_present(dev)) {
483         msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
484     }
485 }
486 
msix_save(PCIDevice * dev,QEMUFile * f)487 void msix_save(PCIDevice *dev, QEMUFile *f)
488 {
489     unsigned n = dev->msix_entries_nr;
490 
491     if (!msix_present(dev)) {
492         return;
493     }
494 
495     qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
496     qemu_put_buffer(f, dev->msix_pba, DIV_ROUND_UP(n, 8));
497 }
498 
499 /* Should be called after restoring the config space. */
msix_load(PCIDevice * dev,QEMUFile * f)500 void msix_load(PCIDevice *dev, QEMUFile *f)
501 {
502     unsigned n = dev->msix_entries_nr;
503     unsigned int vector;
504 
505     if (!msix_present(dev)) {
506         return;
507     }
508 
509     msix_clear_all_vectors(dev);
510     qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
511     qemu_get_buffer(f, dev->msix_pba, DIV_ROUND_UP(n, 8));
512     msix_update_function_masked(dev);
513 
514     for (vector = 0; vector < n; vector++) {
515         msix_handle_mask_update(dev, vector, true);
516     }
517 }
518 
519 /* Does device support MSI-X? */
msix_present(PCIDevice * dev)520 int msix_present(PCIDevice *dev)
521 {
522     return dev->cap_present & QEMU_PCI_CAP_MSIX;
523 }
524 
525 /* Is MSI-X enabled? */
msix_enabled(PCIDevice * dev)526 int msix_enabled(PCIDevice *dev)
527 {
528     return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
529         (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
530          MSIX_ENABLE_MASK);
531 }
532 
533 /* Send an MSI-X message */
msix_notify(PCIDevice * dev,unsigned vector)534 void msix_notify(PCIDevice *dev, unsigned vector)
535 {
536     MSIMessage msg;
537 
538     assert(vector < dev->msix_entries_nr);
539 
540     if (!dev->msix_entry_used[vector]) {
541         return;
542     }
543 
544     if (msix_is_masked(dev, vector)) {
545         msix_set_pending(dev, vector);
546         return;
547     }
548 
549     msg = msix_get_message(dev, vector);
550 
551     msi_send_message(dev, msg);
552 }
553 
msix_reset(PCIDevice * dev)554 void msix_reset(PCIDevice *dev)
555 {
556     if (!msix_present(dev)) {
557         return;
558     }
559     msix_clear_all_vectors(dev);
560     dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
561             ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
562     memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
563     memset(dev->msix_pba, 0, QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8);
564     msix_mask_all(dev, dev->msix_entries_nr);
565 }
566 
567 /* PCI spec suggests that devices make it possible for software to configure
568  * less vectors than supported by the device, but does not specify a standard
569  * mechanism for devices to do so.
570  *
571  * We support this by asking devices to declare vectors software is going to
572  * actually use, and checking this on the notification path. Devices that
573  * don't want to follow the spec suggestion can declare all vectors as used. */
574 
575 /* Mark vector as used. */
msix_vector_use(PCIDevice * dev,unsigned vector)576 void msix_vector_use(PCIDevice *dev, unsigned vector)
577 {
578     assert(vector < dev->msix_entries_nr);
579     dev->msix_entry_used[vector]++;
580 }
581 
582 /* Mark vector as unused. */
msix_vector_unuse(PCIDevice * dev,unsigned vector)583 void msix_vector_unuse(PCIDevice *dev, unsigned vector)
584 {
585     assert(vector < dev->msix_entries_nr);
586     if (!dev->msix_entry_used[vector]) {
587         return;
588     }
589     if (--dev->msix_entry_used[vector]) {
590         return;
591     }
592     msix_clr_pending(dev, vector);
593 }
594 
msix_unuse_all_vectors(PCIDevice * dev)595 void msix_unuse_all_vectors(PCIDevice *dev)
596 {
597     if (!msix_present(dev)) {
598         return;
599     }
600     msix_free_irq_entries(dev);
601 }
602 
msix_nr_vectors_allocated(const PCIDevice * dev)603 unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
604 {
605     return dev->msix_entries_nr;
606 }
607 
msix_set_notifier_for_vector(PCIDevice * dev,unsigned int vector)608 static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector)
609 {
610     MSIMessage msg;
611 
612     if (msix_is_masked(dev, vector)) {
613         return 0;
614     }
615     msg = msix_get_message(dev, vector);
616     return dev->msix_vector_use_notifier(dev, vector, msg);
617 }
618 
msix_unset_notifier_for_vector(PCIDevice * dev,unsigned int vector)619 static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
620 {
621     if (msix_is_masked(dev, vector)) {
622         return;
623     }
624     dev->msix_vector_release_notifier(dev, vector);
625 }
626 
msix_set_vector_notifiers(PCIDevice * dev,MSIVectorUseNotifier use_notifier,MSIVectorReleaseNotifier release_notifier,MSIVectorPollNotifier poll_notifier)627 int msix_set_vector_notifiers(PCIDevice *dev,
628                               MSIVectorUseNotifier use_notifier,
629                               MSIVectorReleaseNotifier release_notifier,
630                               MSIVectorPollNotifier poll_notifier)
631 {
632     int vector, ret;
633 
634     assert(use_notifier && release_notifier);
635 
636     dev->msix_vector_use_notifier = use_notifier;
637     dev->msix_vector_release_notifier = release_notifier;
638     dev->msix_vector_poll_notifier = poll_notifier;
639 
640     if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
641         (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
642         for (vector = 0; vector < dev->msix_entries_nr; vector++) {
643             ret = msix_set_notifier_for_vector(dev, vector);
644             if (ret < 0) {
645                 goto undo;
646             }
647         }
648     }
649     if (dev->msix_vector_poll_notifier) {
650         dev->msix_vector_poll_notifier(dev, 0, dev->msix_entries_nr);
651     }
652     return 0;
653 
654 undo:
655     while (--vector >= 0) {
656         msix_unset_notifier_for_vector(dev, vector);
657     }
658     dev->msix_vector_use_notifier = NULL;
659     dev->msix_vector_release_notifier = NULL;
660     dev->msix_vector_poll_notifier = NULL;
661     return ret;
662 }
663 
msix_unset_vector_notifiers(PCIDevice * dev)664 void msix_unset_vector_notifiers(PCIDevice *dev)
665 {
666     int vector;
667 
668     assert(dev->msix_vector_use_notifier &&
669            dev->msix_vector_release_notifier);
670 
671     if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
672         (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
673         for (vector = 0; vector < dev->msix_entries_nr; vector++) {
674             msix_unset_notifier_for_vector(dev, vector);
675         }
676     }
677     dev->msix_vector_use_notifier = NULL;
678     dev->msix_vector_release_notifier = NULL;
679     dev->msix_vector_poll_notifier = NULL;
680 }
681 
put_msix_state(QEMUFile * f,void * pv,size_t size,const VMStateField * field,JSONWriter * vmdesc)682 static int put_msix_state(QEMUFile *f, void *pv, size_t size,
683                           const VMStateField *field, JSONWriter *vmdesc)
684 {
685     msix_save(pv, f);
686 
687     return 0;
688 }
689 
get_msix_state(QEMUFile * f,void * pv,size_t size,const VMStateField * field)690 static int get_msix_state(QEMUFile *f, void *pv, size_t size,
691                           const VMStateField *field)
692 {
693     msix_load(pv, f);
694     return 0;
695 }
696 
697 static const VMStateInfo vmstate_info_msix = {
698     .name = "msix state",
699     .get  = get_msix_state,
700     .put  = put_msix_state,
701 };
702 
703 const VMStateDescription vmstate_msix = {
704     .name = "msix",
705     .fields = (const VMStateField[]) {
706         {
707             .name         = "msix",
708             .version_id   = 0,
709             .field_exists = NULL,
710             .size         = 0,   /* ouch */
711             .info         = &vmstate_info_msix,
712             .flags        = VMS_SINGLE,
713             .offset       = 0,
714         },
715         VMSTATE_END_OF_LIST()
716     }
717 };
718