xref: /qemu/hw/remote/vfio-user-obj.c (revision 08cf3dc611991e9697f62458107e13f2c582869a)
18f9a9259SJagannathan Raman /**
28f9a9259SJagannathan Raman  * QEMU vfio-user-server server object
38f9a9259SJagannathan Raman  *
48f9a9259SJagannathan Raman  * Copyright © 2022 Oracle and/or its affiliates.
58f9a9259SJagannathan Raman  *
68f9a9259SJagannathan Raman  * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
78f9a9259SJagannathan Raman  *
88f9a9259SJagannathan Raman  * See the COPYING file in the top-level directory.
98f9a9259SJagannathan Raman  *
108f9a9259SJagannathan Raman  */
118f9a9259SJagannathan Raman 
128f9a9259SJagannathan Raman /**
138f9a9259SJagannathan Raman  * Usage: add options:
148f9a9259SJagannathan Raman  *     -machine x-remote,vfio-user=on,auto-shutdown=on
158f9a9259SJagannathan Raman  *     -device <PCI-device>,id=<pci-dev-id>
168f9a9259SJagannathan Raman  *     -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>,
178f9a9259SJagannathan Raman  *             device=<pci-dev-id>
188f9a9259SJagannathan Raman  *
198f9a9259SJagannathan Raman  * Note that x-vfio-user-server object must be used with x-remote machine only.
208f9a9259SJagannathan Raman  * This server could only support PCI devices for now.
218f9a9259SJagannathan Raman  *
228f9a9259SJagannathan Raman  * type - SocketAddress type - presently "unix" alone is supported. Required
238f9a9259SJagannathan Raman  *        option
248f9a9259SJagannathan Raman  *
258f9a9259SJagannathan Raman  * path - named unix socket, it will be created by the server. It is
268f9a9259SJagannathan Raman  *        a required option
278f9a9259SJagannathan Raman  *
288f9a9259SJagannathan Raman  * device - id of a device on the server, a required option. PCI devices
298f9a9259SJagannathan Raman  *          alone are supported presently.
309fb3fba1SJagannathan Raman  *
319fb3fba1SJagannathan Raman  * notes - x-vfio-user-server could block IO and monitor during the
329fb3fba1SJagannathan Raman  *         initialization phase.
338f9a9259SJagannathan Raman  */
348f9a9259SJagannathan Raman 
358f9a9259SJagannathan Raman #include "qemu/osdep.h"
368f9a9259SJagannathan Raman 
378f9a9259SJagannathan Raman #include "qom/object.h"
388f9a9259SJagannathan Raman #include "qom/object_interfaces.h"
398f9a9259SJagannathan Raman #include "qemu/error-report.h"
408f9a9259SJagannathan Raman #include "trace.h"
418f9a9259SJagannathan Raman #include "sysemu/runstate.h"
428f9a9259SJagannathan Raman #include "hw/boards.h"
438f9a9259SJagannathan Raman #include "hw/remote/machine.h"
448f9a9259SJagannathan Raman #include "qapi/error.h"
458f9a9259SJagannathan Raman #include "qapi/qapi-visit-sockets.h"
469fb3fba1SJagannathan Raman #include "qapi/qapi-events-misc.h"
4787f7249fSJagannathan Raman #include "qemu/notify.h"
489fb3fba1SJagannathan Raman #include "qemu/thread.h"
4990072f29SJagannathan Raman #include "qemu/main-loop.h"
5087f7249fSJagannathan Raman #include "sysemu/sysemu.h"
5187f7249fSJagannathan Raman #include "libvfio-user.h"
52a6e8d6d9SJagannathan Raman #include "hw/qdev-core.h"
53a6e8d6d9SJagannathan Raman #include "hw/pci/pci.h"
549fb3fba1SJagannathan Raman #include "qemu/timer.h"
553123f93dSJagannathan Raman #include "exec/memory.h"
56*08cf3dc6SJagannathan Raman #include "hw/pci/msi.h"
57*08cf3dc6SJagannathan Raman #include "hw/pci/msix.h"
58*08cf3dc6SJagannathan Raman #include "hw/remote/vfio-user-obj.h"
598f9a9259SJagannathan Raman 
608f9a9259SJagannathan Raman #define TYPE_VFU_OBJECT "x-vfio-user-server"
618f9a9259SJagannathan Raman OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
628f9a9259SJagannathan Raman 
638f9a9259SJagannathan Raman /**
648f9a9259SJagannathan Raman  * VFU_OBJECT_ERROR - reports an error message. If auto_shutdown
658f9a9259SJagannathan Raman  * is set, it aborts the machine on error. Otherwise, it logs an
668f9a9259SJagannathan Raman  * error message without aborting.
678f9a9259SJagannathan Raman  */
688f9a9259SJagannathan Raman #define VFU_OBJECT_ERROR(o, fmt, ...)                                     \
698f9a9259SJagannathan Raman     {                                                                     \
708f9a9259SJagannathan Raman         if (vfu_object_auto_shutdown()) {                                 \
718f9a9259SJagannathan Raman             error_setg(&error_abort, (fmt), ## __VA_ARGS__);              \
728f9a9259SJagannathan Raman         } else {                                                          \
738f9a9259SJagannathan Raman             error_report((fmt), ## __VA_ARGS__);                          \
748f9a9259SJagannathan Raman         }                                                                 \
758f9a9259SJagannathan Raman     }                                                                     \
768f9a9259SJagannathan Raman 
778f9a9259SJagannathan Raman struct VfuObjectClass {
788f9a9259SJagannathan Raman     ObjectClass parent_class;
798f9a9259SJagannathan Raman 
808f9a9259SJagannathan Raman     unsigned int nr_devs;
818f9a9259SJagannathan Raman };
828f9a9259SJagannathan Raman 
838f9a9259SJagannathan Raman struct VfuObject {
848f9a9259SJagannathan Raman     /* private */
858f9a9259SJagannathan Raman     Object parent;
868f9a9259SJagannathan Raman 
878f9a9259SJagannathan Raman     SocketAddress *socket;
888f9a9259SJagannathan Raman 
898f9a9259SJagannathan Raman     char *device;
908f9a9259SJagannathan Raman 
918f9a9259SJagannathan Raman     Error *err;
9287f7249fSJagannathan Raman 
9387f7249fSJagannathan Raman     Notifier machine_done;
9487f7249fSJagannathan Raman 
9587f7249fSJagannathan Raman     vfu_ctx_t *vfu_ctx;
96a6e8d6d9SJagannathan Raman 
97a6e8d6d9SJagannathan Raman     PCIDevice *pci_dev;
98a6e8d6d9SJagannathan Raman 
99a6e8d6d9SJagannathan Raman     Error *unplug_blocker;
1009fb3fba1SJagannathan Raman 
1019fb3fba1SJagannathan Raman     int vfu_poll_fd;
102*08cf3dc6SJagannathan Raman 
103*08cf3dc6SJagannathan Raman     MSITriggerFunc *default_msi_trigger;
104*08cf3dc6SJagannathan Raman     MSIPrepareMessageFunc *default_msi_prepare_message;
105*08cf3dc6SJagannathan Raman     MSIxPrepareMessageFunc *default_msix_prepare_message;
1068f9a9259SJagannathan Raman };
1078f9a9259SJagannathan Raman 
10887f7249fSJagannathan Raman static void vfu_object_init_ctx(VfuObject *o, Error **errp);
10987f7249fSJagannathan Raman 
1108f9a9259SJagannathan Raman static bool vfu_object_auto_shutdown(void)
1118f9a9259SJagannathan Raman {
1128f9a9259SJagannathan Raman     bool auto_shutdown = true;
1138f9a9259SJagannathan Raman     Error *local_err = NULL;
1148f9a9259SJagannathan Raman 
1158f9a9259SJagannathan Raman     if (!current_machine) {
1168f9a9259SJagannathan Raman         return auto_shutdown;
1178f9a9259SJagannathan Raman     }
1188f9a9259SJagannathan Raman 
1198f9a9259SJagannathan Raman     auto_shutdown = object_property_get_bool(OBJECT(current_machine),
1208f9a9259SJagannathan Raman                                              "auto-shutdown",
1218f9a9259SJagannathan Raman                                              &local_err);
1228f9a9259SJagannathan Raman 
1238f9a9259SJagannathan Raman     /*
1248f9a9259SJagannathan Raman      * local_err would be set if no such property exists - safe to ignore.
1258f9a9259SJagannathan Raman      * Unlikely scenario as auto-shutdown is always defined for
1268f9a9259SJagannathan Raman      * TYPE_REMOTE_MACHINE, and  TYPE_VFU_OBJECT only works with
1278f9a9259SJagannathan Raman      * TYPE_REMOTE_MACHINE
1288f9a9259SJagannathan Raman      */
1298f9a9259SJagannathan Raman     if (local_err) {
1308f9a9259SJagannathan Raman         auto_shutdown = true;
1318f9a9259SJagannathan Raman         error_free(local_err);
1328f9a9259SJagannathan Raman     }
1338f9a9259SJagannathan Raman 
1348f9a9259SJagannathan Raman     return auto_shutdown;
1358f9a9259SJagannathan Raman }
1368f9a9259SJagannathan Raman 
1378f9a9259SJagannathan Raman static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
1388f9a9259SJagannathan Raman                                   void *opaque, Error **errp)
1398f9a9259SJagannathan Raman {
1408f9a9259SJagannathan Raman     VfuObject *o = VFU_OBJECT(obj);
1418f9a9259SJagannathan Raman 
14287f7249fSJagannathan Raman     if (o->vfu_ctx) {
14387f7249fSJagannathan Raman         error_setg(errp, "vfu: Unable to set socket property - server busy");
14487f7249fSJagannathan Raman         return;
14587f7249fSJagannathan Raman     }
14687f7249fSJagannathan Raman 
1478f9a9259SJagannathan Raman     qapi_free_SocketAddress(o->socket);
1488f9a9259SJagannathan Raman 
1498f9a9259SJagannathan Raman     o->socket = NULL;
1508f9a9259SJagannathan Raman 
1518f9a9259SJagannathan Raman     visit_type_SocketAddress(v, name, &o->socket, errp);
1528f9a9259SJagannathan Raman 
1538f9a9259SJagannathan Raman     if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
1548f9a9259SJagannathan Raman         error_setg(errp, "vfu: Unsupported socket type - %s",
1558f9a9259SJagannathan Raman                    SocketAddressType_str(o->socket->type));
1568f9a9259SJagannathan Raman         qapi_free_SocketAddress(o->socket);
1578f9a9259SJagannathan Raman         o->socket = NULL;
1588f9a9259SJagannathan Raman         return;
1598f9a9259SJagannathan Raman     }
1608f9a9259SJagannathan Raman 
1618f9a9259SJagannathan Raman     trace_vfu_prop("socket", o->socket->u.q_unix.path);
16287f7249fSJagannathan Raman 
16387f7249fSJagannathan Raman     vfu_object_init_ctx(o, errp);
1648f9a9259SJagannathan Raman }
1658f9a9259SJagannathan Raman 
1668f9a9259SJagannathan Raman static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
1678f9a9259SJagannathan Raman {
1688f9a9259SJagannathan Raman     VfuObject *o = VFU_OBJECT(obj);
1698f9a9259SJagannathan Raman 
17087f7249fSJagannathan Raman     if (o->vfu_ctx) {
17187f7249fSJagannathan Raman         error_setg(errp, "vfu: Unable to set device property - server busy");
17287f7249fSJagannathan Raman         return;
17387f7249fSJagannathan Raman     }
17487f7249fSJagannathan Raman 
1758f9a9259SJagannathan Raman     g_free(o->device);
1768f9a9259SJagannathan Raman 
1778f9a9259SJagannathan Raman     o->device = g_strdup(str);
1788f9a9259SJagannathan Raman 
1798f9a9259SJagannathan Raman     trace_vfu_prop("device", str);
18087f7249fSJagannathan Raman 
18187f7249fSJagannathan Raman     vfu_object_init_ctx(o, errp);
18287f7249fSJagannathan Raman }
18387f7249fSJagannathan Raman 
1849fb3fba1SJagannathan Raman static void vfu_object_ctx_run(void *opaque)
1859fb3fba1SJagannathan Raman {
1869fb3fba1SJagannathan Raman     VfuObject *o = opaque;
1879fb3fba1SJagannathan Raman     const char *vfu_id;
1889fb3fba1SJagannathan Raman     char *vfu_path, *pci_dev_path;
1899fb3fba1SJagannathan Raman     int ret = -1;
1909fb3fba1SJagannathan Raman 
1919fb3fba1SJagannathan Raman     while (ret != 0) {
1929fb3fba1SJagannathan Raman         ret = vfu_run_ctx(o->vfu_ctx);
1939fb3fba1SJagannathan Raman         if (ret < 0) {
1949fb3fba1SJagannathan Raman             if (errno == EINTR) {
1959fb3fba1SJagannathan Raman                 continue;
1969fb3fba1SJagannathan Raman             } else if (errno == ENOTCONN) {
1979fb3fba1SJagannathan Raman                 vfu_id = object_get_canonical_path_component(OBJECT(o));
1989fb3fba1SJagannathan Raman                 vfu_path = object_get_canonical_path(OBJECT(o));
1999fb3fba1SJagannathan Raman                 g_assert(o->pci_dev);
2009fb3fba1SJagannathan Raman                 pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
2019fb3fba1SJagannathan Raman                  /* o->device is a required property and is non-NULL here */
2029fb3fba1SJagannathan Raman                 g_assert(o->device);
2039fb3fba1SJagannathan Raman                 qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
2049fb3fba1SJagannathan Raman                                                   o->device, pci_dev_path);
2059fb3fba1SJagannathan Raman                 qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
2069fb3fba1SJagannathan Raman                 o->vfu_poll_fd = -1;
2079fb3fba1SJagannathan Raman                 object_unparent(OBJECT(o));
2089fb3fba1SJagannathan Raman                 g_free(vfu_path);
2099fb3fba1SJagannathan Raman                 g_free(pci_dev_path);
2109fb3fba1SJagannathan Raman                 break;
2119fb3fba1SJagannathan Raman             } else {
2129fb3fba1SJagannathan Raman                 VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s",
2139fb3fba1SJagannathan Raman                                  o->device, strerror(errno));
2149fb3fba1SJagannathan Raman                 break;
2159fb3fba1SJagannathan Raman             }
2169fb3fba1SJagannathan Raman         }
2179fb3fba1SJagannathan Raman     }
2189fb3fba1SJagannathan Raman }
2199fb3fba1SJagannathan Raman 
2209fb3fba1SJagannathan Raman static void vfu_object_attach_ctx(void *opaque)
2219fb3fba1SJagannathan Raman {
2229fb3fba1SJagannathan Raman     VfuObject *o = opaque;
2239fb3fba1SJagannathan Raman     GPollFD pfds[1];
2249fb3fba1SJagannathan Raman     int ret;
2259fb3fba1SJagannathan Raman 
2269fb3fba1SJagannathan Raman     qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
2279fb3fba1SJagannathan Raman 
2289fb3fba1SJagannathan Raman     pfds[0].fd = o->vfu_poll_fd;
2299fb3fba1SJagannathan Raman     pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
2309fb3fba1SJagannathan Raman 
2319fb3fba1SJagannathan Raman retry_attach:
2329fb3fba1SJagannathan Raman     ret = vfu_attach_ctx(o->vfu_ctx);
2339fb3fba1SJagannathan Raman     if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
2349fb3fba1SJagannathan Raman         /**
2359fb3fba1SJagannathan Raman          * vfu_object_attach_ctx can block QEMU's main loop
2369fb3fba1SJagannathan Raman          * during attach - the monitor and other IO
2379fb3fba1SJagannathan Raman          * could be unresponsive during this time.
2389fb3fba1SJagannathan Raman          */
2399fb3fba1SJagannathan Raman         (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
2409fb3fba1SJagannathan Raman         goto retry_attach;
2419fb3fba1SJagannathan Raman     } else if (ret < 0) {
2429fb3fba1SJagannathan Raman         VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s",
2439fb3fba1SJagannathan Raman                          o->device, strerror(errno));
2449fb3fba1SJagannathan Raman         return;
2459fb3fba1SJagannathan Raman     }
2469fb3fba1SJagannathan Raman 
2479fb3fba1SJagannathan Raman     o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
2489fb3fba1SJagannathan Raman     if (o->vfu_poll_fd < 0) {
2499fb3fba1SJagannathan Raman         VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device);
2509fb3fba1SJagannathan Raman         return;
2519fb3fba1SJagannathan Raman     }
2529fb3fba1SJagannathan Raman 
2539fb3fba1SJagannathan Raman     qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o);
2549fb3fba1SJagannathan Raman }
2559fb3fba1SJagannathan Raman 
25690072f29SJagannathan Raman static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
25790072f29SJagannathan Raman                                      size_t count, loff_t offset,
25890072f29SJagannathan Raman                                      const bool is_write)
25990072f29SJagannathan Raman {
26090072f29SJagannathan Raman     VfuObject *o = vfu_get_private(vfu_ctx);
26190072f29SJagannathan Raman     uint32_t pci_access_width = sizeof(uint32_t);
26290072f29SJagannathan Raman     size_t bytes = count;
26390072f29SJagannathan Raman     uint32_t val = 0;
26490072f29SJagannathan Raman     char *ptr = buf;
26590072f29SJagannathan Raman     int len;
26690072f29SJagannathan Raman 
26790072f29SJagannathan Raman     /*
26890072f29SJagannathan Raman      * Writes to the BAR registers would trigger an update to the
26990072f29SJagannathan Raman      * global Memory and IO AddressSpaces. But the remote device
27090072f29SJagannathan Raman      * never uses the global AddressSpaces, therefore overlapping
27190072f29SJagannathan Raman      * memory regions are not a problem
27290072f29SJagannathan Raman      */
27390072f29SJagannathan Raman     while (bytes > 0) {
27490072f29SJagannathan Raman         len = (bytes > pci_access_width) ? pci_access_width : bytes;
27590072f29SJagannathan Raman         if (is_write) {
27690072f29SJagannathan Raman             memcpy(&val, ptr, len);
27790072f29SJagannathan Raman             pci_host_config_write_common(o->pci_dev, offset,
27890072f29SJagannathan Raman                                          pci_config_size(o->pci_dev),
27990072f29SJagannathan Raman                                          val, len);
28090072f29SJagannathan Raman             trace_vfu_cfg_write(offset, val);
28190072f29SJagannathan Raman         } else {
28290072f29SJagannathan Raman             val = pci_host_config_read_common(o->pci_dev, offset,
28390072f29SJagannathan Raman                                               pci_config_size(o->pci_dev), len);
28490072f29SJagannathan Raman             memcpy(ptr, &val, len);
28590072f29SJagannathan Raman             trace_vfu_cfg_read(offset, val);
28690072f29SJagannathan Raman         }
28790072f29SJagannathan Raman         offset += len;
28890072f29SJagannathan Raman         ptr += len;
28990072f29SJagannathan Raman         bytes -= len;
29090072f29SJagannathan Raman     }
29190072f29SJagannathan Raman 
29290072f29SJagannathan Raman     return count;
29390072f29SJagannathan Raman }
29490072f29SJagannathan Raman 
29515ccf9beSJagannathan Raman static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
29615ccf9beSJagannathan Raman {
29715ccf9beSJagannathan Raman     VfuObject *o = vfu_get_private(vfu_ctx);
29815ccf9beSJagannathan Raman     AddressSpace *dma_as = NULL;
29915ccf9beSJagannathan Raman     MemoryRegion *subregion = NULL;
30015ccf9beSJagannathan Raman     g_autofree char *name = NULL;
30115ccf9beSJagannathan Raman     struct iovec *iov = &info->iova;
30215ccf9beSJagannathan Raman 
30315ccf9beSJagannathan Raman     if (!info->vaddr) {
30415ccf9beSJagannathan Raman         return;
30515ccf9beSJagannathan Raman     }
30615ccf9beSJagannathan Raman 
30715ccf9beSJagannathan Raman     name = g_strdup_printf("mem-%s-%"PRIx64"", o->device,
30815ccf9beSJagannathan Raman                            (uint64_t)info->vaddr);
30915ccf9beSJagannathan Raman 
31015ccf9beSJagannathan Raman     subregion = g_new0(MemoryRegion, 1);
31115ccf9beSJagannathan Raman 
31215ccf9beSJagannathan Raman     memory_region_init_ram_ptr(subregion, NULL, name,
31315ccf9beSJagannathan Raman                                iov->iov_len, info->vaddr);
31415ccf9beSJagannathan Raman 
31515ccf9beSJagannathan Raman     dma_as = pci_device_iommu_address_space(o->pci_dev);
31615ccf9beSJagannathan Raman 
31715ccf9beSJagannathan Raman     memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion);
31815ccf9beSJagannathan Raman 
31915ccf9beSJagannathan Raman     trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len);
32015ccf9beSJagannathan Raman }
32115ccf9beSJagannathan Raman 
32215ccf9beSJagannathan Raman static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
32315ccf9beSJagannathan Raman {
32415ccf9beSJagannathan Raman     VfuObject *o = vfu_get_private(vfu_ctx);
32515ccf9beSJagannathan Raman     AddressSpace *dma_as = NULL;
32615ccf9beSJagannathan Raman     MemoryRegion *mr = NULL;
32715ccf9beSJagannathan Raman     ram_addr_t offset;
32815ccf9beSJagannathan Raman 
32915ccf9beSJagannathan Raman     mr = memory_region_from_host(info->vaddr, &offset);
33015ccf9beSJagannathan Raman     if (!mr) {
33115ccf9beSJagannathan Raman         return;
33215ccf9beSJagannathan Raman     }
33315ccf9beSJagannathan Raman 
33415ccf9beSJagannathan Raman     dma_as = pci_device_iommu_address_space(o->pci_dev);
33515ccf9beSJagannathan Raman 
33615ccf9beSJagannathan Raman     memory_region_del_subregion(dma_as->root, mr);
33715ccf9beSJagannathan Raman 
33815ccf9beSJagannathan Raman     object_unparent((OBJECT(mr)));
33915ccf9beSJagannathan Raman 
34015ccf9beSJagannathan Raman     trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
34115ccf9beSJagannathan Raman }
34215ccf9beSJagannathan Raman 
3433123f93dSJagannathan Raman static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset,
3443123f93dSJagannathan Raman                             hwaddr size, const bool is_write)
3453123f93dSJagannathan Raman {
3463123f93dSJagannathan Raman     uint8_t *ptr = buf;
3473123f93dSJagannathan Raman     bool release_lock = false;
3483123f93dSJagannathan Raman     uint8_t *ram_ptr = NULL;
3493123f93dSJagannathan Raman     MemTxResult result;
3503123f93dSJagannathan Raman     int access_size;
3513123f93dSJagannathan Raman     uint64_t val;
3523123f93dSJagannathan Raman 
3533123f93dSJagannathan Raman     if (memory_access_is_direct(mr, is_write)) {
3543123f93dSJagannathan Raman         /**
3553123f93dSJagannathan Raman          * Some devices expose a PCI expansion ROM, which could be buffer
3563123f93dSJagannathan Raman          * based as compared to other regions which are primarily based on
3573123f93dSJagannathan Raman          * MemoryRegionOps. memory_region_find() would already check
3583123f93dSJagannathan Raman          * for buffer overflow, we don't need to repeat it here.
3593123f93dSJagannathan Raman          */
3603123f93dSJagannathan Raman         ram_ptr = memory_region_get_ram_ptr(mr);
3613123f93dSJagannathan Raman 
3623123f93dSJagannathan Raman         if (is_write) {
3633123f93dSJagannathan Raman             memcpy((ram_ptr + offset), buf, size);
3643123f93dSJagannathan Raman         } else {
3653123f93dSJagannathan Raman             memcpy(buf, (ram_ptr + offset), size);
3663123f93dSJagannathan Raman         }
3673123f93dSJagannathan Raman 
3683123f93dSJagannathan Raman         return 0;
3693123f93dSJagannathan Raman     }
3703123f93dSJagannathan Raman 
3713123f93dSJagannathan Raman     while (size) {
3723123f93dSJagannathan Raman         /**
3733123f93dSJagannathan Raman          * The read/write logic used below is similar to the ones in
3743123f93dSJagannathan Raman          * flatview_read/write_continue()
3753123f93dSJagannathan Raman          */
3763123f93dSJagannathan Raman         release_lock = prepare_mmio_access(mr);
3773123f93dSJagannathan Raman 
3783123f93dSJagannathan Raman         access_size = memory_access_size(mr, size, offset);
3793123f93dSJagannathan Raman 
3803123f93dSJagannathan Raman         if (is_write) {
3813123f93dSJagannathan Raman             val = ldn_he_p(ptr, access_size);
3823123f93dSJagannathan Raman 
3833123f93dSJagannathan Raman             result = memory_region_dispatch_write(mr, offset, val,
3843123f93dSJagannathan Raman                                                   size_memop(access_size),
3853123f93dSJagannathan Raman                                                   MEMTXATTRS_UNSPECIFIED);
3863123f93dSJagannathan Raman         } else {
3873123f93dSJagannathan Raman             result = memory_region_dispatch_read(mr, offset, &val,
3883123f93dSJagannathan Raman                                                  size_memop(access_size),
3893123f93dSJagannathan Raman                                                  MEMTXATTRS_UNSPECIFIED);
3903123f93dSJagannathan Raman 
3913123f93dSJagannathan Raman             stn_he_p(ptr, access_size, val);
3923123f93dSJagannathan Raman         }
3933123f93dSJagannathan Raman 
3943123f93dSJagannathan Raman         if (release_lock) {
3953123f93dSJagannathan Raman             qemu_mutex_unlock_iothread();
3963123f93dSJagannathan Raman             release_lock = false;
3973123f93dSJagannathan Raman         }
3983123f93dSJagannathan Raman 
3993123f93dSJagannathan Raman         if (result != MEMTX_OK) {
4003123f93dSJagannathan Raman             return -1;
4013123f93dSJagannathan Raman         }
4023123f93dSJagannathan Raman 
4033123f93dSJagannathan Raman         size -= access_size;
4043123f93dSJagannathan Raman         ptr += access_size;
4053123f93dSJagannathan Raman         offset += access_size;
4063123f93dSJagannathan Raman     }
4073123f93dSJagannathan Raman 
4083123f93dSJagannathan Raman     return 0;
4093123f93dSJagannathan Raman }
4103123f93dSJagannathan Raman 
4113123f93dSJagannathan Raman static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
4123123f93dSJagannathan Raman                                 hwaddr bar_offset, char * const buf,
4133123f93dSJagannathan Raman                                 hwaddr len, const bool is_write)
4143123f93dSJagannathan Raman {
4153123f93dSJagannathan Raman     MemoryRegionSection section = { 0 };
4163123f93dSJagannathan Raman     uint8_t *ptr = (uint8_t *)buf;
4173123f93dSJagannathan Raman     MemoryRegion *section_mr = NULL;
4183123f93dSJagannathan Raman     uint64_t section_size;
4193123f93dSJagannathan Raman     hwaddr section_offset;
4203123f93dSJagannathan Raman     hwaddr size = 0;
4213123f93dSJagannathan Raman 
4223123f93dSJagannathan Raman     while (len) {
4233123f93dSJagannathan Raman         section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
4243123f93dSJagannathan Raman                                      bar_offset, len);
4253123f93dSJagannathan Raman 
4263123f93dSJagannathan Raman         if (!section.mr) {
4273123f93dSJagannathan Raman             warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset);
4283123f93dSJagannathan Raman             return size;
4293123f93dSJagannathan Raman         }
4303123f93dSJagannathan Raman 
4313123f93dSJagannathan Raman         section_mr = section.mr;
4323123f93dSJagannathan Raman         section_offset = section.offset_within_region;
4333123f93dSJagannathan Raman         section_size = int128_get64(section.size);
4343123f93dSJagannathan Raman 
4353123f93dSJagannathan Raman         if (is_write && section_mr->readonly) {
4363123f93dSJagannathan Raman             warn_report("vfu: attempting to write to readonly region in "
4373123f93dSJagannathan Raman                         "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
4383123f93dSJagannathan Raman                         pci_bar, bar_offset,
4393123f93dSJagannathan Raman                         (bar_offset + section_size));
4403123f93dSJagannathan Raman             memory_region_unref(section_mr);
4413123f93dSJagannathan Raman             return size;
4423123f93dSJagannathan Raman         }
4433123f93dSJagannathan Raman 
4443123f93dSJagannathan Raman         if (vfu_object_mr_rw(section_mr, ptr, section_offset,
4453123f93dSJagannathan Raman                              section_size, is_write)) {
4463123f93dSJagannathan Raman             warn_report("vfu: failed to %s "
4473123f93dSJagannathan Raman                         "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d",
4483123f93dSJagannathan Raman                         is_write ? "write to" : "read from", bar_offset,
4493123f93dSJagannathan Raman                         (bar_offset + section_size), pci_bar);
4503123f93dSJagannathan Raman             memory_region_unref(section_mr);
4513123f93dSJagannathan Raman             return size;
4523123f93dSJagannathan Raman         }
4533123f93dSJagannathan Raman 
4543123f93dSJagannathan Raman         size += section_size;
4553123f93dSJagannathan Raman         bar_offset += section_size;
4563123f93dSJagannathan Raman         ptr += section_size;
4573123f93dSJagannathan Raman         len -= section_size;
4583123f93dSJagannathan Raman 
4593123f93dSJagannathan Raman         memory_region_unref(section_mr);
4603123f93dSJagannathan Raman     }
4613123f93dSJagannathan Raman 
4623123f93dSJagannathan Raman     return size;
4633123f93dSJagannathan Raman }
4643123f93dSJagannathan Raman 
4653123f93dSJagannathan Raman /**
4663123f93dSJagannathan Raman  * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs.
4673123f93dSJagannathan Raman  *
4683123f93dSJagannathan Raman  * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would
4693123f93dSJagannathan Raman  * define vfu_object_bar2_handler
4703123f93dSJagannathan Raman  */
4713123f93dSJagannathan Raman #define VFU_OBJECT_BAR_HANDLER(BAR_NO)                                         \
4723123f93dSJagannathan Raman     static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx,        \
4733123f93dSJagannathan Raman                                         char * const buf, size_t count,        \
4743123f93dSJagannathan Raman                                         loff_t offset, const bool is_write)    \
4753123f93dSJagannathan Raman     {                                                                          \
4763123f93dSJagannathan Raman         VfuObject *o = vfu_get_private(vfu_ctx);                               \
4773123f93dSJagannathan Raman         PCIDevice *pci_dev = o->pci_dev;                                       \
4783123f93dSJagannathan Raman                                                                                \
4793123f93dSJagannathan Raman         return vfu_object_bar_rw(pci_dev, BAR_NO, offset,                      \
4803123f93dSJagannathan Raman                                  buf, count, is_write);                        \
4813123f93dSJagannathan Raman     }                                                                          \
4823123f93dSJagannathan Raman 
4833123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(0)
4843123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(1)
4853123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(2)
4863123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(3)
4873123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(4)
4883123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(5)
4893123f93dSJagannathan Raman VFU_OBJECT_BAR_HANDLER(6)
4903123f93dSJagannathan Raman 
4913123f93dSJagannathan Raman static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = {
4923123f93dSJagannathan Raman     &vfu_object_bar0_handler,
4933123f93dSJagannathan Raman     &vfu_object_bar1_handler,
4943123f93dSJagannathan Raman     &vfu_object_bar2_handler,
4953123f93dSJagannathan Raman     &vfu_object_bar3_handler,
4963123f93dSJagannathan Raman     &vfu_object_bar4_handler,
4973123f93dSJagannathan Raman     &vfu_object_bar5_handler,
4983123f93dSJagannathan Raman     &vfu_object_bar6_handler,
4993123f93dSJagannathan Raman };
5003123f93dSJagannathan Raman 
5013123f93dSJagannathan Raman /**
5023123f93dSJagannathan Raman  * vfu_object_register_bars - Identify active BAR regions of pdev and setup
5033123f93dSJagannathan Raman  *                            callbacks to handle read/write accesses
5043123f93dSJagannathan Raman  */
5053123f93dSJagannathan Raman static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
5063123f93dSJagannathan Raman {
5073123f93dSJagannathan Raman     int flags = VFU_REGION_FLAG_RW;
5083123f93dSJagannathan Raman     int i;
5093123f93dSJagannathan Raman 
5103123f93dSJagannathan Raman     for (i = 0; i < PCI_NUM_REGIONS; i++) {
5113123f93dSJagannathan Raman         if (!pdev->io_regions[i].size) {
5123123f93dSJagannathan Raman             continue;
5133123f93dSJagannathan Raman         }
5143123f93dSJagannathan Raman 
5153123f93dSJagannathan Raman         if ((i == VFU_PCI_DEV_ROM_REGION_IDX) ||
5163123f93dSJagannathan Raman             pdev->io_regions[i].memory->readonly) {
5173123f93dSJagannathan Raman             flags &= ~VFU_REGION_FLAG_WRITE;
5183123f93dSJagannathan Raman         }
5193123f93dSJagannathan Raman 
5203123f93dSJagannathan Raman         vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i,
5213123f93dSJagannathan Raman                          (size_t)pdev->io_regions[i].size,
5223123f93dSJagannathan Raman                          vfu_object_bar_handlers[i],
5233123f93dSJagannathan Raman                          flags, NULL, 0, -1, 0);
5243123f93dSJagannathan Raman 
5253123f93dSJagannathan Raman         trace_vfu_bar_register(i, pdev->io_regions[i].addr,
5263123f93dSJagannathan Raman                                pdev->io_regions[i].size);
5273123f93dSJagannathan Raman     }
5283123f93dSJagannathan Raman }
5293123f93dSJagannathan Raman 
530*08cf3dc6SJagannathan Raman static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
531*08cf3dc6SJagannathan Raman {
532*08cf3dc6SJagannathan Raman     int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
533*08cf3dc6SJagannathan Raman                                 pci_dev->devfn);
534*08cf3dc6SJagannathan Raman 
535*08cf3dc6SJagannathan Raman     return pci_bdf;
536*08cf3dc6SJagannathan Raman }
537*08cf3dc6SJagannathan Raman 
538*08cf3dc6SJagannathan Raman static void vfu_object_set_irq(void *opaque, int pirq, int level)
539*08cf3dc6SJagannathan Raman {
540*08cf3dc6SJagannathan Raman     PCIBus *pci_bus = opaque;
541*08cf3dc6SJagannathan Raman     PCIDevice *pci_dev = NULL;
542*08cf3dc6SJagannathan Raman     vfu_ctx_t *vfu_ctx = NULL;
543*08cf3dc6SJagannathan Raman     int pci_bus_num, devfn;
544*08cf3dc6SJagannathan Raman 
545*08cf3dc6SJagannathan Raman     if (level) {
546*08cf3dc6SJagannathan Raman         pci_bus_num = PCI_BUS_NUM(pirq);
547*08cf3dc6SJagannathan Raman         devfn = PCI_BDF_TO_DEVFN(pirq);
548*08cf3dc6SJagannathan Raman 
549*08cf3dc6SJagannathan Raman         /*
550*08cf3dc6SJagannathan Raman          * pci_find_device() performs at O(1) if the device is attached
551*08cf3dc6SJagannathan Raman          * to the root PCI bus. Whereas, if the device is attached to a
552*08cf3dc6SJagannathan Raman          * secondary PCI bus (such as when a root port is involved),
553*08cf3dc6SJagannathan Raman          * finding the parent PCI bus could take O(n)
554*08cf3dc6SJagannathan Raman          */
555*08cf3dc6SJagannathan Raman         pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn);
556*08cf3dc6SJagannathan Raman 
557*08cf3dc6SJagannathan Raman         vfu_ctx = pci_dev->irq_opaque;
558*08cf3dc6SJagannathan Raman 
559*08cf3dc6SJagannathan Raman         g_assert(vfu_ctx);
560*08cf3dc6SJagannathan Raman 
561*08cf3dc6SJagannathan Raman         vfu_irq_trigger(vfu_ctx, 0);
562*08cf3dc6SJagannathan Raman     }
563*08cf3dc6SJagannathan Raman }
564*08cf3dc6SJagannathan Raman 
565*08cf3dc6SJagannathan Raman static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev,
566*08cf3dc6SJagannathan Raman                                              unsigned int vector)
567*08cf3dc6SJagannathan Raman {
568*08cf3dc6SJagannathan Raman     MSIMessage msg;
569*08cf3dc6SJagannathan Raman 
570*08cf3dc6SJagannathan Raman     msg.address = 0;
571*08cf3dc6SJagannathan Raman     msg.data = vector;
572*08cf3dc6SJagannathan Raman 
573*08cf3dc6SJagannathan Raman     return msg;
574*08cf3dc6SJagannathan Raman }
575*08cf3dc6SJagannathan Raman 
576*08cf3dc6SJagannathan Raman static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg)
577*08cf3dc6SJagannathan Raman {
578*08cf3dc6SJagannathan Raman     vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque;
579*08cf3dc6SJagannathan Raman 
580*08cf3dc6SJagannathan Raman     vfu_irq_trigger(vfu_ctx, msg.data);
581*08cf3dc6SJagannathan Raman }
582*08cf3dc6SJagannathan Raman 
583*08cf3dc6SJagannathan Raman static void vfu_object_setup_msi_cbs(VfuObject *o)
584*08cf3dc6SJagannathan Raman {
585*08cf3dc6SJagannathan Raman     o->default_msi_trigger = o->pci_dev->msi_trigger;
586*08cf3dc6SJagannathan Raman     o->default_msi_prepare_message = o->pci_dev->msi_prepare_message;
587*08cf3dc6SJagannathan Raman     o->default_msix_prepare_message = o->pci_dev->msix_prepare_message;
588*08cf3dc6SJagannathan Raman 
589*08cf3dc6SJagannathan Raman     o->pci_dev->msi_trigger = vfu_object_msi_trigger;
590*08cf3dc6SJagannathan Raman     o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg;
591*08cf3dc6SJagannathan Raman     o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg;
592*08cf3dc6SJagannathan Raman }
593*08cf3dc6SJagannathan Raman 
594*08cf3dc6SJagannathan Raman static void vfu_object_restore_msi_cbs(VfuObject *o)
595*08cf3dc6SJagannathan Raman {
596*08cf3dc6SJagannathan Raman     o->pci_dev->msi_trigger = o->default_msi_trigger;
597*08cf3dc6SJagannathan Raman     o->pci_dev->msi_prepare_message = o->default_msi_prepare_message;
598*08cf3dc6SJagannathan Raman     o->pci_dev->msix_prepare_message = o->default_msix_prepare_message;
599*08cf3dc6SJagannathan Raman }
600*08cf3dc6SJagannathan Raman 
601*08cf3dc6SJagannathan Raman static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
602*08cf3dc6SJagannathan Raman                                uint32_t count, bool mask)
603*08cf3dc6SJagannathan Raman {
604*08cf3dc6SJagannathan Raman     VfuObject *o = vfu_get_private(vfu_ctx);
605*08cf3dc6SJagannathan Raman     Error *err = NULL;
606*08cf3dc6SJagannathan Raman     uint32_t vector;
607*08cf3dc6SJagannathan Raman 
608*08cf3dc6SJagannathan Raman     for (vector = start; vector < count; vector++) {
609*08cf3dc6SJagannathan Raman         msix_set_mask(o->pci_dev, vector, mask, &err);
610*08cf3dc6SJagannathan Raman         if (err) {
611*08cf3dc6SJagannathan Raman             VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
612*08cf3dc6SJagannathan Raman                              error_get_pretty(err));
613*08cf3dc6SJagannathan Raman             error_free(err);
614*08cf3dc6SJagannathan Raman             err = NULL;
615*08cf3dc6SJagannathan Raman         }
616*08cf3dc6SJagannathan Raman     }
617*08cf3dc6SJagannathan Raman }
618*08cf3dc6SJagannathan Raman 
619*08cf3dc6SJagannathan Raman static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
620*08cf3dc6SJagannathan Raman                               uint32_t count, bool mask)
621*08cf3dc6SJagannathan Raman {
622*08cf3dc6SJagannathan Raman     VfuObject *o = vfu_get_private(vfu_ctx);
623*08cf3dc6SJagannathan Raman     Error *err = NULL;
624*08cf3dc6SJagannathan Raman     uint32_t vector;
625*08cf3dc6SJagannathan Raman 
626*08cf3dc6SJagannathan Raman     for (vector = start; vector < count; vector++) {
627*08cf3dc6SJagannathan Raman         msi_set_mask(o->pci_dev, vector, mask, &err);
628*08cf3dc6SJagannathan Raman         if (err) {
629*08cf3dc6SJagannathan Raman             VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
630*08cf3dc6SJagannathan Raman                              error_get_pretty(err));
631*08cf3dc6SJagannathan Raman             error_free(err);
632*08cf3dc6SJagannathan Raman             err = NULL;
633*08cf3dc6SJagannathan Raman         }
634*08cf3dc6SJagannathan Raman     }
635*08cf3dc6SJagannathan Raman }
636*08cf3dc6SJagannathan Raman 
637*08cf3dc6SJagannathan Raman static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
638*08cf3dc6SJagannathan Raman {
639*08cf3dc6SJagannathan Raman     vfu_ctx_t *vfu_ctx = o->vfu_ctx;
640*08cf3dc6SJagannathan Raman     int ret;
641*08cf3dc6SJagannathan Raman 
642*08cf3dc6SJagannathan Raman     ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
643*08cf3dc6SJagannathan Raman     if (ret < 0) {
644*08cf3dc6SJagannathan Raman         return ret;
645*08cf3dc6SJagannathan Raman     }
646*08cf3dc6SJagannathan Raman 
647*08cf3dc6SJagannathan Raman     if (msix_nr_vectors_allocated(pci_dev)) {
648*08cf3dc6SJagannathan Raman         ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
649*08cf3dc6SJagannathan Raman                                        msix_nr_vectors_allocated(pci_dev));
650*08cf3dc6SJagannathan Raman         vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ,
651*08cf3dc6SJagannathan Raman                                      &vfu_msix_irq_state);
652*08cf3dc6SJagannathan Raman     } else if (msi_nr_vectors_allocated(pci_dev)) {
653*08cf3dc6SJagannathan Raman         ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
654*08cf3dc6SJagannathan Raman                                        msi_nr_vectors_allocated(pci_dev));
655*08cf3dc6SJagannathan Raman         vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ,
656*08cf3dc6SJagannathan Raman                                      &vfu_msi_irq_state);
657*08cf3dc6SJagannathan Raman     }
658*08cf3dc6SJagannathan Raman 
659*08cf3dc6SJagannathan Raman     if (ret < 0) {
660*08cf3dc6SJagannathan Raman         return ret;
661*08cf3dc6SJagannathan Raman     }
662*08cf3dc6SJagannathan Raman 
663*08cf3dc6SJagannathan Raman     vfu_object_setup_msi_cbs(o);
664*08cf3dc6SJagannathan Raman 
665*08cf3dc6SJagannathan Raman     pci_dev->irq_opaque = vfu_ctx;
666*08cf3dc6SJagannathan Raman 
667*08cf3dc6SJagannathan Raman     return 0;
668*08cf3dc6SJagannathan Raman }
669*08cf3dc6SJagannathan Raman 
670*08cf3dc6SJagannathan Raman void vfu_object_set_bus_irq(PCIBus *pci_bus)
671*08cf3dc6SJagannathan Raman {
672*08cf3dc6SJagannathan Raman     int bus_num = pci_bus_num(pci_bus);
673*08cf3dc6SJagannathan Raman     int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1);
674*08cf3dc6SJagannathan Raman 
675*08cf3dc6SJagannathan Raman     pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, pci_bus,
676*08cf3dc6SJagannathan Raman                  max_bdf);
677*08cf3dc6SJagannathan Raman }
678*08cf3dc6SJagannathan Raman 
67987f7249fSJagannathan Raman /*
68087f7249fSJagannathan Raman  * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
68187f7249fSJagannathan Raman  * properties. It also depends on devices instantiated in QEMU. These
68287f7249fSJagannathan Raman  * dependencies are not available during the instance_init phase of this
68387f7249fSJagannathan Raman  * object's life-cycle. As such, the server is initialized after the
68487f7249fSJagannathan Raman  * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT
68587f7249fSJagannathan Raman  * when the machine is setup, and the dependencies are available.
68687f7249fSJagannathan Raman  */
68787f7249fSJagannathan Raman static void vfu_object_machine_done(Notifier *notifier, void *data)
68887f7249fSJagannathan Raman {
68987f7249fSJagannathan Raman     VfuObject *o = container_of(notifier, VfuObject, machine_done);
69087f7249fSJagannathan Raman     Error *err = NULL;
69187f7249fSJagannathan Raman 
69287f7249fSJagannathan Raman     vfu_object_init_ctx(o, &err);
69387f7249fSJagannathan Raman 
69487f7249fSJagannathan Raman     if (err) {
69587f7249fSJagannathan Raman         error_propagate(&error_abort, err);
69687f7249fSJagannathan Raman     }
69787f7249fSJagannathan Raman }
69887f7249fSJagannathan Raman 
6999fb3fba1SJagannathan Raman /**
7009fb3fba1SJagannathan Raman  * vfu_object_init_ctx: Create and initialize libvfio-user context. Add
7019fb3fba1SJagannathan Raman  *     an unplug blocker for the associated PCI device. Setup a FD handler
7029fb3fba1SJagannathan Raman  *     to process incoming messages in the context's socket.
7039fb3fba1SJagannathan Raman  *
7049fb3fba1SJagannathan Raman  *     The socket and device properties are mandatory, and this function
7059fb3fba1SJagannathan Raman  *     will not create the context without them - the setters for these
7069fb3fba1SJagannathan Raman  *     properties should call this function when the property is set. The
7079fb3fba1SJagannathan Raman  *     machine should also be ready when this function is invoked - it is
7089fb3fba1SJagannathan Raman  *     because QEMU objects are initialized before devices, and the
7099fb3fba1SJagannathan Raman  *     associated PCI device wouldn't be available at the object
7109fb3fba1SJagannathan Raman  *     initialization time. Until these conditions are satisfied, this
7119fb3fba1SJagannathan Raman  *     function would return early without performing any task.
7129fb3fba1SJagannathan Raman  */
71387f7249fSJagannathan Raman static void vfu_object_init_ctx(VfuObject *o, Error **errp)
71487f7249fSJagannathan Raman {
71587f7249fSJagannathan Raman     ERRP_GUARD();
716a6e8d6d9SJagannathan Raman     DeviceState *dev = NULL;
717a6e8d6d9SJagannathan Raman     vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
718a6e8d6d9SJagannathan Raman     int ret;
71987f7249fSJagannathan Raman 
72087f7249fSJagannathan Raman     if (o->vfu_ctx || !o->socket || !o->device ||
72187f7249fSJagannathan Raman             !phase_check(PHASE_MACHINE_READY)) {
72287f7249fSJagannathan Raman         return;
72387f7249fSJagannathan Raman     }
72487f7249fSJagannathan Raman 
72587f7249fSJagannathan Raman     if (o->err) {
72687f7249fSJagannathan Raman         error_propagate(errp, o->err);
72787f7249fSJagannathan Raman         o->err = NULL;
72887f7249fSJagannathan Raman         return;
72987f7249fSJagannathan Raman     }
73087f7249fSJagannathan Raman 
7319fb3fba1SJagannathan Raman     o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path,
7329fb3fba1SJagannathan Raman                                 LIBVFIO_USER_FLAG_ATTACH_NB,
73387f7249fSJagannathan Raman                                 o, VFU_DEV_TYPE_PCI);
73487f7249fSJagannathan Raman     if (o->vfu_ctx == NULL) {
73587f7249fSJagannathan Raman         error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
73687f7249fSJagannathan Raman         return;
73787f7249fSJagannathan Raman     }
738a6e8d6d9SJagannathan Raman 
739a6e8d6d9SJagannathan Raman     dev = qdev_find_recursive(sysbus_get_default(), o->device);
740a6e8d6d9SJagannathan Raman     if (dev == NULL) {
741a6e8d6d9SJagannathan Raman         error_setg(errp, "vfu: Device %s not found", o->device);
742a6e8d6d9SJagannathan Raman         goto fail;
743a6e8d6d9SJagannathan Raman     }
744a6e8d6d9SJagannathan Raman 
745a6e8d6d9SJagannathan Raman     if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
746a6e8d6d9SJagannathan Raman         error_setg(errp, "vfu: %s not a PCI device", o->device);
747a6e8d6d9SJagannathan Raman         goto fail;
748a6e8d6d9SJagannathan Raman     }
749a6e8d6d9SJagannathan Raman 
750a6e8d6d9SJagannathan Raman     o->pci_dev = PCI_DEVICE(dev);
751a6e8d6d9SJagannathan Raman 
752a6e8d6d9SJagannathan Raman     object_ref(OBJECT(o->pci_dev));
753a6e8d6d9SJagannathan Raman 
754a6e8d6d9SJagannathan Raman     if (pci_is_express(o->pci_dev)) {
755a6e8d6d9SJagannathan Raman         pci_type = VFU_PCI_TYPE_EXPRESS;
756a6e8d6d9SJagannathan Raman     }
757a6e8d6d9SJagannathan Raman 
758a6e8d6d9SJagannathan Raman     ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0);
759a6e8d6d9SJagannathan Raman     if (ret < 0) {
760a6e8d6d9SJagannathan Raman         error_setg(errp,
761a6e8d6d9SJagannathan Raman                    "vfu: Failed to attach PCI device %s to context - %s",
762a6e8d6d9SJagannathan Raman                    o->device, strerror(errno));
763a6e8d6d9SJagannathan Raman         goto fail;
764a6e8d6d9SJagannathan Raman     }
765a6e8d6d9SJagannathan Raman 
766a6e8d6d9SJagannathan Raman     error_setg(&o->unplug_blocker,
767a6e8d6d9SJagannathan Raman                "vfu: %s for %s must be deleted before unplugging",
768a6e8d6d9SJagannathan Raman                TYPE_VFU_OBJECT, o->device);
769a6e8d6d9SJagannathan Raman     qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
770a6e8d6d9SJagannathan Raman 
77190072f29SJagannathan Raman     ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX,
77290072f29SJagannathan Raman                            pci_config_size(o->pci_dev), &vfu_object_cfg_access,
77390072f29SJagannathan Raman                            VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB,
77490072f29SJagannathan Raman                            NULL, 0, -1, 0);
77590072f29SJagannathan Raman     if (ret < 0) {
77690072f29SJagannathan Raman         error_setg(errp,
77790072f29SJagannathan Raman                    "vfu: Failed to setup config space handlers for %s- %s",
77890072f29SJagannathan Raman                    o->device, strerror(errno));
77990072f29SJagannathan Raman         goto fail;
78090072f29SJagannathan Raman     }
78190072f29SJagannathan Raman 
78215ccf9beSJagannathan Raman     ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister);
78315ccf9beSJagannathan Raman     if (ret < 0) {
78415ccf9beSJagannathan Raman         error_setg(errp, "vfu: Failed to setup DMA handlers for %s",
78515ccf9beSJagannathan Raman                    o->device);
78615ccf9beSJagannathan Raman         goto fail;
78715ccf9beSJagannathan Raman     }
78815ccf9beSJagannathan Raman 
7893123f93dSJagannathan Raman     vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
7903123f93dSJagannathan Raman 
791*08cf3dc6SJagannathan Raman     ret = vfu_object_setup_irqs(o, o->pci_dev);
792*08cf3dc6SJagannathan Raman     if (ret < 0) {
793*08cf3dc6SJagannathan Raman         error_setg(errp, "vfu: Failed to setup interrupts for %s",
794*08cf3dc6SJagannathan Raman                    o->device);
795*08cf3dc6SJagannathan Raman         goto fail;
796*08cf3dc6SJagannathan Raman     }
797*08cf3dc6SJagannathan Raman 
7989fb3fba1SJagannathan Raman     ret = vfu_realize_ctx(o->vfu_ctx);
7999fb3fba1SJagannathan Raman     if (ret < 0) {
8009fb3fba1SJagannathan Raman         error_setg(errp, "vfu: Failed to realize device %s- %s",
8019fb3fba1SJagannathan Raman                    o->device, strerror(errno));
8029fb3fba1SJagannathan Raman         goto fail;
8039fb3fba1SJagannathan Raman     }
8049fb3fba1SJagannathan Raman 
8059fb3fba1SJagannathan Raman     o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
8069fb3fba1SJagannathan Raman     if (o->vfu_poll_fd < 0) {
8079fb3fba1SJagannathan Raman         error_setg(errp, "vfu: Failed to get poll fd %s", o->device);
8089fb3fba1SJagannathan Raman         goto fail;
8099fb3fba1SJagannathan Raman     }
8109fb3fba1SJagannathan Raman 
8119fb3fba1SJagannathan Raman     qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
8129fb3fba1SJagannathan Raman 
813a6e8d6d9SJagannathan Raman     return;
814a6e8d6d9SJagannathan Raman 
815a6e8d6d9SJagannathan Raman fail:
816a6e8d6d9SJagannathan Raman     vfu_destroy_ctx(o->vfu_ctx);
817a6e8d6d9SJagannathan Raman     if (o->unplug_blocker && o->pci_dev) {
818a6e8d6d9SJagannathan Raman         qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
819a6e8d6d9SJagannathan Raman         error_free(o->unplug_blocker);
820a6e8d6d9SJagannathan Raman         o->unplug_blocker = NULL;
821a6e8d6d9SJagannathan Raman     }
822a6e8d6d9SJagannathan Raman     if (o->pci_dev) {
823*08cf3dc6SJagannathan Raman         vfu_object_restore_msi_cbs(o);
824*08cf3dc6SJagannathan Raman         o->pci_dev->irq_opaque = NULL;
825a6e8d6d9SJagannathan Raman         object_unref(OBJECT(o->pci_dev));
826a6e8d6d9SJagannathan Raman         o->pci_dev = NULL;
827a6e8d6d9SJagannathan Raman     }
828a6e8d6d9SJagannathan Raman     o->vfu_ctx = NULL;
8298f9a9259SJagannathan Raman }
8308f9a9259SJagannathan Raman 
8318f9a9259SJagannathan Raman static void vfu_object_init(Object *obj)
8328f9a9259SJagannathan Raman {
8338f9a9259SJagannathan Raman     VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
8348f9a9259SJagannathan Raman     VfuObject *o = VFU_OBJECT(obj);
8358f9a9259SJagannathan Raman 
8368f9a9259SJagannathan Raman     k->nr_devs++;
8378f9a9259SJagannathan Raman 
8388f9a9259SJagannathan Raman     if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) {
8398f9a9259SJagannathan Raman         error_setg(&o->err, "vfu: %s only compatible with %s machine",
8408f9a9259SJagannathan Raman                    TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
8418f9a9259SJagannathan Raman         return;
8428f9a9259SJagannathan Raman     }
84387f7249fSJagannathan Raman 
84487f7249fSJagannathan Raman     if (!phase_check(PHASE_MACHINE_READY)) {
84587f7249fSJagannathan Raman         o->machine_done.notify = vfu_object_machine_done;
84687f7249fSJagannathan Raman         qemu_add_machine_init_done_notifier(&o->machine_done);
84787f7249fSJagannathan Raman     }
84887f7249fSJagannathan Raman 
8499fb3fba1SJagannathan Raman     o->vfu_poll_fd = -1;
8508f9a9259SJagannathan Raman }
8518f9a9259SJagannathan Raman 
8528f9a9259SJagannathan Raman static void vfu_object_finalize(Object *obj)
8538f9a9259SJagannathan Raman {
8548f9a9259SJagannathan Raman     VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
8558f9a9259SJagannathan Raman     VfuObject *o = VFU_OBJECT(obj);
8568f9a9259SJagannathan Raman 
8578f9a9259SJagannathan Raman     k->nr_devs--;
8588f9a9259SJagannathan Raman 
8598f9a9259SJagannathan Raman     qapi_free_SocketAddress(o->socket);
8608f9a9259SJagannathan Raman 
8618f9a9259SJagannathan Raman     o->socket = NULL;
8628f9a9259SJagannathan Raman 
8639fb3fba1SJagannathan Raman     if (o->vfu_poll_fd != -1) {
8649fb3fba1SJagannathan Raman         qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
8659fb3fba1SJagannathan Raman         o->vfu_poll_fd = -1;
8669fb3fba1SJagannathan Raman     }
8679fb3fba1SJagannathan Raman 
86887f7249fSJagannathan Raman     if (o->vfu_ctx) {
86987f7249fSJagannathan Raman         vfu_destroy_ctx(o->vfu_ctx);
87087f7249fSJagannathan Raman         o->vfu_ctx = NULL;
87187f7249fSJagannathan Raman     }
87287f7249fSJagannathan Raman 
8738f9a9259SJagannathan Raman     g_free(o->device);
8748f9a9259SJagannathan Raman 
8758f9a9259SJagannathan Raman     o->device = NULL;
8768f9a9259SJagannathan Raman 
877a6e8d6d9SJagannathan Raman     if (o->unplug_blocker && o->pci_dev) {
878a6e8d6d9SJagannathan Raman         qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
879a6e8d6d9SJagannathan Raman         error_free(o->unplug_blocker);
880a6e8d6d9SJagannathan Raman         o->unplug_blocker = NULL;
881a6e8d6d9SJagannathan Raman     }
882a6e8d6d9SJagannathan Raman 
883a6e8d6d9SJagannathan Raman     if (o->pci_dev) {
884*08cf3dc6SJagannathan Raman         vfu_object_restore_msi_cbs(o);
885*08cf3dc6SJagannathan Raman         o->pci_dev->irq_opaque = NULL;
886a6e8d6d9SJagannathan Raman         object_unref(OBJECT(o->pci_dev));
887a6e8d6d9SJagannathan Raman         o->pci_dev = NULL;
888a6e8d6d9SJagannathan Raman     }
889a6e8d6d9SJagannathan Raman 
8908f9a9259SJagannathan Raman     if (!k->nr_devs && vfu_object_auto_shutdown()) {
8918f9a9259SJagannathan Raman         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
8928f9a9259SJagannathan Raman     }
89387f7249fSJagannathan Raman 
89487f7249fSJagannathan Raman     if (o->machine_done.notify) {
89587f7249fSJagannathan Raman         qemu_remove_machine_init_done_notifier(&o->machine_done);
89687f7249fSJagannathan Raman         o->machine_done.notify = NULL;
89787f7249fSJagannathan Raman     }
8988f9a9259SJagannathan Raman }
8998f9a9259SJagannathan Raman 
9008f9a9259SJagannathan Raman static void vfu_object_class_init(ObjectClass *klass, void *data)
9018f9a9259SJagannathan Raman {
9028f9a9259SJagannathan Raman     VfuObjectClass *k = VFU_OBJECT_CLASS(klass);
9038f9a9259SJagannathan Raman 
9048f9a9259SJagannathan Raman     k->nr_devs = 0;
9058f9a9259SJagannathan Raman 
9068f9a9259SJagannathan Raman     object_class_property_add(klass, "socket", "SocketAddress", NULL,
9078f9a9259SJagannathan Raman                               vfu_object_set_socket, NULL, NULL);
9088f9a9259SJagannathan Raman     object_class_property_set_description(klass, "socket",
9098f9a9259SJagannathan Raman                                           "SocketAddress "
9108f9a9259SJagannathan Raman                                           "(ex: type=unix,path=/tmp/sock). "
9118f9a9259SJagannathan Raman                                           "Only UNIX is presently supported");
9128f9a9259SJagannathan Raman     object_class_property_add_str(klass, "device", NULL,
9138f9a9259SJagannathan Raman                                   vfu_object_set_device);
9148f9a9259SJagannathan Raman     object_class_property_set_description(klass, "device",
9158f9a9259SJagannathan Raman                                           "device ID - only PCI devices "
9168f9a9259SJagannathan Raman                                           "are presently supported");
9178f9a9259SJagannathan Raman }
9188f9a9259SJagannathan Raman 
9198f9a9259SJagannathan Raman static const TypeInfo vfu_object_info = {
9208f9a9259SJagannathan Raman     .name = TYPE_VFU_OBJECT,
9218f9a9259SJagannathan Raman     .parent = TYPE_OBJECT,
9228f9a9259SJagannathan Raman     .instance_size = sizeof(VfuObject),
9238f9a9259SJagannathan Raman     .instance_init = vfu_object_init,
9248f9a9259SJagannathan Raman     .instance_finalize = vfu_object_finalize,
9258f9a9259SJagannathan Raman     .class_size = sizeof(VfuObjectClass),
9268f9a9259SJagannathan Raman     .class_init = vfu_object_class_init,
9278f9a9259SJagannathan Raman     .interfaces = (InterfaceInfo[]) {
9288f9a9259SJagannathan Raman         { TYPE_USER_CREATABLE },
9298f9a9259SJagannathan Raman         { }
9308f9a9259SJagannathan Raman     }
9318f9a9259SJagannathan Raman };
9328f9a9259SJagannathan Raman 
9338f9a9259SJagannathan Raman static void vfu_register_types(void)
9348f9a9259SJagannathan Raman {
9358f9a9259SJagannathan Raman     type_register_static(&vfu_object_info);
9368f9a9259SJagannathan Raman }
9378f9a9259SJagannathan Raman 
9388f9a9259SJagannathan Raman type_init(vfu_register_types);
939