1cb039ef3SIlya Maximets /* 2cb039ef3SIlya Maximets * AF_XDP network backend. 3cb039ef3SIlya Maximets * 4cb039ef3SIlya Maximets * Copyright (c) 2023 Red Hat, Inc. 5cb039ef3SIlya Maximets * 6cb039ef3SIlya Maximets * Authors: 7cb039ef3SIlya Maximets * Ilya Maximets <i.maximets@ovn.org> 8cb039ef3SIlya Maximets * 9cb039ef3SIlya Maximets * This work is licensed under the terms of the GNU GPL, version 2 or later. 10cb039ef3SIlya Maximets * See the COPYING file in the top-level directory. 11cb039ef3SIlya Maximets */ 12cb039ef3SIlya Maximets 13cb039ef3SIlya Maximets 14cb039ef3SIlya Maximets #include "qemu/osdep.h" 15cb039ef3SIlya Maximets #include <bpf/bpf.h> 16cb039ef3SIlya Maximets #include <linux/if_link.h> 17cb039ef3SIlya Maximets #include <linux/if_xdp.h> 18cb039ef3SIlya Maximets #include <net/if.h> 19cb039ef3SIlya Maximets #include <xdp/xsk.h> 20cb039ef3SIlya Maximets 21cb039ef3SIlya Maximets #include "clients.h" 22cb039ef3SIlya Maximets #include "monitor/monitor.h" 23cb039ef3SIlya Maximets #include "net/net.h" 24cb039ef3SIlya Maximets #include "qapi/error.h" 25cb039ef3SIlya Maximets #include "qemu/cutils.h" 26cb039ef3SIlya Maximets #include "qemu/error-report.h" 27cb039ef3SIlya Maximets #include "qemu/iov.h" 28cb039ef3SIlya Maximets #include "qemu/main-loop.h" 29cb039ef3SIlya Maximets #include "qemu/memalign.h" 30cb039ef3SIlya Maximets 31cb039ef3SIlya Maximets 32cb039ef3SIlya Maximets typedef struct AFXDPState { 33cb039ef3SIlya Maximets NetClientState nc; 34cb039ef3SIlya Maximets 35cb039ef3SIlya Maximets struct xsk_socket *xsk; 36cb039ef3SIlya Maximets struct xsk_ring_cons rx; 37cb039ef3SIlya Maximets struct xsk_ring_prod tx; 38cb039ef3SIlya Maximets struct xsk_ring_cons cq; 39cb039ef3SIlya Maximets struct xsk_ring_prod fq; 40cb039ef3SIlya Maximets 41cb039ef3SIlya Maximets char ifname[IFNAMSIZ]; 42cb039ef3SIlya Maximets int ifindex; 43cb039ef3SIlya Maximets bool read_poll; 44cb039ef3SIlya Maximets bool write_poll; 45cb039ef3SIlya Maximets uint32_t outstanding_tx; 46cb039ef3SIlya Maximets 47cb039ef3SIlya Maximets uint64_t *pool; 48cb039ef3SIlya Maximets uint32_t n_pool; 49cb039ef3SIlya Maximets char *buffer; 50cb039ef3SIlya Maximets struct xsk_umem *umem; 51cb039ef3SIlya Maximets 52cb039ef3SIlya Maximets uint32_t n_queues; 53cb039ef3SIlya Maximets uint32_t xdp_flags; 54cb039ef3SIlya Maximets bool inhibit; 55cb039ef3SIlya Maximets } AFXDPState; 56cb039ef3SIlya Maximets 57cb039ef3SIlya Maximets #define AF_XDP_BATCH_SIZE 64 58cb039ef3SIlya Maximets 59cb039ef3SIlya Maximets static void af_xdp_send(void *opaque); 60cb039ef3SIlya Maximets static void af_xdp_writable(void *opaque); 61cb039ef3SIlya Maximets 62cb039ef3SIlya Maximets /* Set the event-loop handlers for the af-xdp backend. */ 63cb039ef3SIlya Maximets static void af_xdp_update_fd_handler(AFXDPState *s) 64cb039ef3SIlya Maximets { 65cb039ef3SIlya Maximets qemu_set_fd_handler(xsk_socket__fd(s->xsk), 66cb039ef3SIlya Maximets s->read_poll ? af_xdp_send : NULL, 67cb039ef3SIlya Maximets s->write_poll ? af_xdp_writable : NULL, 68cb039ef3SIlya Maximets s); 69cb039ef3SIlya Maximets } 70cb039ef3SIlya Maximets 71cb039ef3SIlya Maximets /* Update the read handler. */ 72cb039ef3SIlya Maximets static void af_xdp_read_poll(AFXDPState *s, bool enable) 73cb039ef3SIlya Maximets { 74cb039ef3SIlya Maximets if (s->read_poll != enable) { 75cb039ef3SIlya Maximets s->read_poll = enable; 76cb039ef3SIlya Maximets af_xdp_update_fd_handler(s); 77cb039ef3SIlya Maximets } 78cb039ef3SIlya Maximets } 79cb039ef3SIlya Maximets 80cb039ef3SIlya Maximets /* Update the write handler. */ 81cb039ef3SIlya Maximets static void af_xdp_write_poll(AFXDPState *s, bool enable) 82cb039ef3SIlya Maximets { 83cb039ef3SIlya Maximets if (s->write_poll != enable) { 84cb039ef3SIlya Maximets s->write_poll = enable; 85cb039ef3SIlya Maximets af_xdp_update_fd_handler(s); 86cb039ef3SIlya Maximets } 87cb039ef3SIlya Maximets } 88cb039ef3SIlya Maximets 89cb039ef3SIlya Maximets static void af_xdp_poll(NetClientState *nc, bool enable) 90cb039ef3SIlya Maximets { 91cb039ef3SIlya Maximets AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc); 92cb039ef3SIlya Maximets 93cb039ef3SIlya Maximets if (s->read_poll != enable || s->write_poll != enable) { 94cb039ef3SIlya Maximets s->write_poll = enable; 95cb039ef3SIlya Maximets s->read_poll = enable; 96cb039ef3SIlya Maximets af_xdp_update_fd_handler(s); 97cb039ef3SIlya Maximets } 98cb039ef3SIlya Maximets } 99cb039ef3SIlya Maximets 100cb039ef3SIlya Maximets static void af_xdp_complete_tx(AFXDPState *s) 101cb039ef3SIlya Maximets { 102cb039ef3SIlya Maximets uint32_t idx = 0; 103cb039ef3SIlya Maximets uint32_t done, i; 104cb039ef3SIlya Maximets uint64_t *addr; 105cb039ef3SIlya Maximets 106cb039ef3SIlya Maximets done = xsk_ring_cons__peek(&s->cq, XSK_RING_CONS__DEFAULT_NUM_DESCS, &idx); 107cb039ef3SIlya Maximets 108cb039ef3SIlya Maximets for (i = 0; i < done; i++) { 109cb039ef3SIlya Maximets addr = (void *) xsk_ring_cons__comp_addr(&s->cq, idx++); 110cb039ef3SIlya Maximets s->pool[s->n_pool++] = *addr; 111cb039ef3SIlya Maximets s->outstanding_tx--; 112cb039ef3SIlya Maximets } 113cb039ef3SIlya Maximets 114cb039ef3SIlya Maximets if (done) { 115cb039ef3SIlya Maximets xsk_ring_cons__release(&s->cq, done); 116cb039ef3SIlya Maximets } 117cb039ef3SIlya Maximets } 118cb039ef3SIlya Maximets 119cb039ef3SIlya Maximets /* 120cb039ef3SIlya Maximets * The fd_write() callback, invoked if the fd is marked as writable 121cb039ef3SIlya Maximets * after a poll. 122cb039ef3SIlya Maximets */ 123cb039ef3SIlya Maximets static void af_xdp_writable(void *opaque) 124cb039ef3SIlya Maximets { 125cb039ef3SIlya Maximets AFXDPState *s = opaque; 126cb039ef3SIlya Maximets 127cb039ef3SIlya Maximets /* Try to recover buffers that are already sent. */ 128cb039ef3SIlya Maximets af_xdp_complete_tx(s); 129cb039ef3SIlya Maximets 130cb039ef3SIlya Maximets /* 131cb039ef3SIlya Maximets * Unregister the handler, unless we still have packets to transmit 132cb039ef3SIlya Maximets * and kernel needs a wake up. 133cb039ef3SIlya Maximets */ 134cb039ef3SIlya Maximets if (!s->outstanding_tx || !xsk_ring_prod__needs_wakeup(&s->tx)) { 135cb039ef3SIlya Maximets af_xdp_write_poll(s, false); 136cb039ef3SIlya Maximets } 137cb039ef3SIlya Maximets 138cb039ef3SIlya Maximets /* Flush any buffered packets. */ 139cb039ef3SIlya Maximets qemu_flush_queued_packets(&s->nc); 140cb039ef3SIlya Maximets } 141cb039ef3SIlya Maximets 142cb039ef3SIlya Maximets static ssize_t af_xdp_receive(NetClientState *nc, 143cb039ef3SIlya Maximets const uint8_t *buf, size_t size) 144cb039ef3SIlya Maximets { 145cb039ef3SIlya Maximets AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc); 146cb039ef3SIlya Maximets struct xdp_desc *desc; 147cb039ef3SIlya Maximets uint32_t idx; 148cb039ef3SIlya Maximets void *data; 149cb039ef3SIlya Maximets 150cb039ef3SIlya Maximets /* Try to recover buffers that are already sent. */ 151cb039ef3SIlya Maximets af_xdp_complete_tx(s); 152cb039ef3SIlya Maximets 153cb039ef3SIlya Maximets if (size > XSK_UMEM__DEFAULT_FRAME_SIZE) { 154cb039ef3SIlya Maximets /* We can't transmit packet this size... */ 155cb039ef3SIlya Maximets return size; 156cb039ef3SIlya Maximets } 157cb039ef3SIlya Maximets 158cb039ef3SIlya Maximets if (!s->n_pool || !xsk_ring_prod__reserve(&s->tx, 1, &idx)) { 159cb039ef3SIlya Maximets /* 160cb039ef3SIlya Maximets * Out of buffers or space in tx ring. Poll until we can write. 161cb039ef3SIlya Maximets * This will also kick the Tx, if it was waiting on CQ. 162cb039ef3SIlya Maximets */ 163cb039ef3SIlya Maximets af_xdp_write_poll(s, true); 164cb039ef3SIlya Maximets return 0; 165cb039ef3SIlya Maximets } 166cb039ef3SIlya Maximets 167cb039ef3SIlya Maximets desc = xsk_ring_prod__tx_desc(&s->tx, idx); 168cb039ef3SIlya Maximets desc->addr = s->pool[--s->n_pool]; 169cb039ef3SIlya Maximets desc->len = size; 170cb039ef3SIlya Maximets 171cb039ef3SIlya Maximets data = xsk_umem__get_data(s->buffer, desc->addr); 172cb039ef3SIlya Maximets memcpy(data, buf, size); 173cb039ef3SIlya Maximets 174cb039ef3SIlya Maximets xsk_ring_prod__submit(&s->tx, 1); 175cb039ef3SIlya Maximets s->outstanding_tx++; 176cb039ef3SIlya Maximets 177cb039ef3SIlya Maximets if (xsk_ring_prod__needs_wakeup(&s->tx)) { 178cb039ef3SIlya Maximets af_xdp_write_poll(s, true); 179cb039ef3SIlya Maximets } 180cb039ef3SIlya Maximets 181cb039ef3SIlya Maximets return size; 182cb039ef3SIlya Maximets } 183cb039ef3SIlya Maximets 184cb039ef3SIlya Maximets /* 185cb039ef3SIlya Maximets * Complete a previous send (backend --> guest) and enable the 186cb039ef3SIlya Maximets * fd_read callback. 187cb039ef3SIlya Maximets */ 188cb039ef3SIlya Maximets static void af_xdp_send_completed(NetClientState *nc, ssize_t len) 189cb039ef3SIlya Maximets { 190cb039ef3SIlya Maximets AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc); 191cb039ef3SIlya Maximets 192cb039ef3SIlya Maximets af_xdp_read_poll(s, true); 193cb039ef3SIlya Maximets } 194cb039ef3SIlya Maximets 195cb039ef3SIlya Maximets static void af_xdp_fq_refill(AFXDPState *s, uint32_t n) 196cb039ef3SIlya Maximets { 197cb039ef3SIlya Maximets uint32_t i, idx = 0; 198cb039ef3SIlya Maximets 199cb039ef3SIlya Maximets /* Leave one packet for Tx, just in case. */ 200cb039ef3SIlya Maximets if (s->n_pool < n + 1) { 201cb039ef3SIlya Maximets n = s->n_pool; 202cb039ef3SIlya Maximets } 203cb039ef3SIlya Maximets 204cb039ef3SIlya Maximets if (!n || !xsk_ring_prod__reserve(&s->fq, n, &idx)) { 205cb039ef3SIlya Maximets return; 206cb039ef3SIlya Maximets } 207cb039ef3SIlya Maximets 208cb039ef3SIlya Maximets for (i = 0; i < n; i++) { 209cb039ef3SIlya Maximets *xsk_ring_prod__fill_addr(&s->fq, idx++) = s->pool[--s->n_pool]; 210cb039ef3SIlya Maximets } 211cb039ef3SIlya Maximets xsk_ring_prod__submit(&s->fq, n); 212cb039ef3SIlya Maximets 213cb039ef3SIlya Maximets if (xsk_ring_prod__needs_wakeup(&s->fq)) { 214cb039ef3SIlya Maximets /* Receive was blocked by not having enough buffers. Wake it up. */ 215cb039ef3SIlya Maximets af_xdp_read_poll(s, true); 216cb039ef3SIlya Maximets } 217cb039ef3SIlya Maximets } 218cb039ef3SIlya Maximets 219cb039ef3SIlya Maximets static void af_xdp_send(void *opaque) 220cb039ef3SIlya Maximets { 221cb039ef3SIlya Maximets uint32_t i, n_rx, idx = 0; 222cb039ef3SIlya Maximets AFXDPState *s = opaque; 223cb039ef3SIlya Maximets 224cb039ef3SIlya Maximets n_rx = xsk_ring_cons__peek(&s->rx, AF_XDP_BATCH_SIZE, &idx); 225cb039ef3SIlya Maximets if (!n_rx) { 226cb039ef3SIlya Maximets return; 227cb039ef3SIlya Maximets } 228cb039ef3SIlya Maximets 229cb039ef3SIlya Maximets for (i = 0; i < n_rx; i++) { 230cb039ef3SIlya Maximets const struct xdp_desc *desc; 231cb039ef3SIlya Maximets struct iovec iov; 232cb039ef3SIlya Maximets 233cb039ef3SIlya Maximets desc = xsk_ring_cons__rx_desc(&s->rx, idx++); 234cb039ef3SIlya Maximets 235cb039ef3SIlya Maximets iov.iov_base = xsk_umem__get_data(s->buffer, desc->addr); 236cb039ef3SIlya Maximets iov.iov_len = desc->len; 237cb039ef3SIlya Maximets 238cb039ef3SIlya Maximets s->pool[s->n_pool++] = desc->addr; 239cb039ef3SIlya Maximets 240cb039ef3SIlya Maximets if (!qemu_sendv_packet_async(&s->nc, &iov, 1, 241cb039ef3SIlya Maximets af_xdp_send_completed)) { 242cb039ef3SIlya Maximets /* 243cb039ef3SIlya Maximets * The peer does not receive anymore. Packet is queued, stop 244cb039ef3SIlya Maximets * reading from the backend until af_xdp_send_completed(). 245cb039ef3SIlya Maximets */ 246cb039ef3SIlya Maximets af_xdp_read_poll(s, false); 247cb039ef3SIlya Maximets 248cb039ef3SIlya Maximets /* Return unused descriptors to not break the ring cache. */ 249cb039ef3SIlya Maximets xsk_ring_cons__cancel(&s->rx, n_rx - i - 1); 250cb039ef3SIlya Maximets n_rx = i + 1; 251cb039ef3SIlya Maximets break; 252cb039ef3SIlya Maximets } 253cb039ef3SIlya Maximets } 254cb039ef3SIlya Maximets 255cb039ef3SIlya Maximets /* Release actually sent descriptors and try to re-fill. */ 256cb039ef3SIlya Maximets xsk_ring_cons__release(&s->rx, n_rx); 257cb039ef3SIlya Maximets af_xdp_fq_refill(s, AF_XDP_BATCH_SIZE); 258cb039ef3SIlya Maximets } 259cb039ef3SIlya Maximets 260cb039ef3SIlya Maximets /* Flush and close. */ 261cb039ef3SIlya Maximets static void af_xdp_cleanup(NetClientState *nc) 262cb039ef3SIlya Maximets { 263cb039ef3SIlya Maximets AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc); 264cb039ef3SIlya Maximets 265cb039ef3SIlya Maximets qemu_purge_queued_packets(nc); 266cb039ef3SIlya Maximets 267cb039ef3SIlya Maximets af_xdp_poll(nc, false); 268cb039ef3SIlya Maximets 269cb039ef3SIlya Maximets xsk_socket__delete(s->xsk); 270cb039ef3SIlya Maximets s->xsk = NULL; 271cb039ef3SIlya Maximets g_free(s->pool); 272cb039ef3SIlya Maximets s->pool = NULL; 273cb039ef3SIlya Maximets xsk_umem__delete(s->umem); 274cb039ef3SIlya Maximets s->umem = NULL; 275cb039ef3SIlya Maximets qemu_vfree(s->buffer); 276cb039ef3SIlya Maximets s->buffer = NULL; 277cb039ef3SIlya Maximets 278cb039ef3SIlya Maximets /* Remove the program if it's the last open queue. */ 279cb039ef3SIlya Maximets if (!s->inhibit && nc->queue_index == s->n_queues - 1 && s->xdp_flags 280cb039ef3SIlya Maximets && bpf_xdp_detach(s->ifindex, s->xdp_flags, NULL) != 0) { 281cb039ef3SIlya Maximets fprintf(stderr, 282cb039ef3SIlya Maximets "af-xdp: unable to remove XDP program from '%s', ifindex: %d\n", 283cb039ef3SIlya Maximets s->ifname, s->ifindex); 284cb039ef3SIlya Maximets } 285cb039ef3SIlya Maximets } 286cb039ef3SIlya Maximets 287cb039ef3SIlya Maximets static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp) 288cb039ef3SIlya Maximets { 289cb039ef3SIlya Maximets struct xsk_umem_config config = { 290cb039ef3SIlya Maximets .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 291cb039ef3SIlya Maximets .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 292cb039ef3SIlya Maximets .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, 293cb039ef3SIlya Maximets .frame_headroom = 0, 294cb039ef3SIlya Maximets }; 295cb039ef3SIlya Maximets uint64_t n_descs; 296cb039ef3SIlya Maximets uint64_t size; 297cb039ef3SIlya Maximets int64_t i; 298cb039ef3SIlya Maximets int ret; 299cb039ef3SIlya Maximets 300cb039ef3SIlya Maximets /* Number of descriptors if all 4 queues (rx, tx, cq, fq) are full. */ 301cb039ef3SIlya Maximets n_descs = (XSK_RING_PROD__DEFAULT_NUM_DESCS 302cb039ef3SIlya Maximets + XSK_RING_CONS__DEFAULT_NUM_DESCS) * 2; 303cb039ef3SIlya Maximets size = n_descs * XSK_UMEM__DEFAULT_FRAME_SIZE; 304cb039ef3SIlya Maximets 305cb039ef3SIlya Maximets s->buffer = qemu_memalign(qemu_real_host_page_size(), size); 306cb039ef3SIlya Maximets memset(s->buffer, 0, size); 307cb039ef3SIlya Maximets 308cb039ef3SIlya Maximets if (sock_fd < 0) { 309cb039ef3SIlya Maximets ret = xsk_umem__create(&s->umem, s->buffer, size, 310cb039ef3SIlya Maximets &s->fq, &s->cq, &config); 311cb039ef3SIlya Maximets } else { 312cb039ef3SIlya Maximets ret = xsk_umem__create_with_fd(&s->umem, sock_fd, s->buffer, size, 313cb039ef3SIlya Maximets &s->fq, &s->cq, &config); 314cb039ef3SIlya Maximets } 315cb039ef3SIlya Maximets 316cb039ef3SIlya Maximets if (ret) { 317cb039ef3SIlya Maximets qemu_vfree(s->buffer); 318cb039ef3SIlya Maximets error_setg_errno(errp, errno, 319cb039ef3SIlya Maximets "failed to create umem for %s queue_index: %d", 320cb039ef3SIlya Maximets s->ifname, s->nc.queue_index); 321cb039ef3SIlya Maximets return -1; 322cb039ef3SIlya Maximets } 323cb039ef3SIlya Maximets 324cb039ef3SIlya Maximets s->pool = g_new(uint64_t, n_descs); 325cb039ef3SIlya Maximets /* Fill the pool in the opposite order, because it's a LIFO queue. */ 326cb039ef3SIlya Maximets for (i = n_descs; i >= 0; i--) { 327cb039ef3SIlya Maximets s->pool[i] = i * XSK_UMEM__DEFAULT_FRAME_SIZE; 328cb039ef3SIlya Maximets } 329cb039ef3SIlya Maximets s->n_pool = n_descs; 330cb039ef3SIlya Maximets 331cb039ef3SIlya Maximets af_xdp_fq_refill(s, XSK_RING_PROD__DEFAULT_NUM_DESCS); 332cb039ef3SIlya Maximets 333cb039ef3SIlya Maximets return 0; 334cb039ef3SIlya Maximets } 335cb039ef3SIlya Maximets 336cb039ef3SIlya Maximets static int af_xdp_socket_create(AFXDPState *s, 337cb039ef3SIlya Maximets const NetdevAFXDPOptions *opts, Error **errp) 338cb039ef3SIlya Maximets { 339cb039ef3SIlya Maximets struct xsk_socket_config cfg = { 340cb039ef3SIlya Maximets .rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 341cb039ef3SIlya Maximets .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 342cb039ef3SIlya Maximets .libxdp_flags = 0, 343cb039ef3SIlya Maximets .bind_flags = XDP_USE_NEED_WAKEUP, 344cb039ef3SIlya Maximets .xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST, 345cb039ef3SIlya Maximets }; 346cb039ef3SIlya Maximets int queue_id, error = 0; 347cb039ef3SIlya Maximets 348cb039ef3SIlya Maximets s->inhibit = opts->has_inhibit && opts->inhibit; 349cb039ef3SIlya Maximets if (s->inhibit) { 350cb039ef3SIlya Maximets cfg.libxdp_flags |= XSK_LIBXDP_FLAGS__INHIBIT_PROG_LOAD; 351cb039ef3SIlya Maximets } 352cb039ef3SIlya Maximets 353cb039ef3SIlya Maximets if (opts->has_force_copy && opts->force_copy) { 354cb039ef3SIlya Maximets cfg.bind_flags |= XDP_COPY; 355cb039ef3SIlya Maximets } 356cb039ef3SIlya Maximets 357cb039ef3SIlya Maximets queue_id = s->nc.queue_index; 358cb039ef3SIlya Maximets if (opts->has_start_queue && opts->start_queue > 0) { 359cb039ef3SIlya Maximets queue_id += opts->start_queue; 360cb039ef3SIlya Maximets } 361cb039ef3SIlya Maximets 362cb039ef3SIlya Maximets if (opts->has_mode) { 363cb039ef3SIlya Maximets /* Specific mode requested. */ 364cb039ef3SIlya Maximets cfg.xdp_flags |= (opts->mode == AFXDP_MODE_NATIVE) 365cb039ef3SIlya Maximets ? XDP_FLAGS_DRV_MODE : XDP_FLAGS_SKB_MODE; 366cb039ef3SIlya Maximets if (xsk_socket__create(&s->xsk, s->ifname, queue_id, 367cb039ef3SIlya Maximets s->umem, &s->rx, &s->tx, &cfg)) { 368cb039ef3SIlya Maximets error = errno; 369cb039ef3SIlya Maximets } 370cb039ef3SIlya Maximets } else { 371cb039ef3SIlya Maximets /* No mode requested, try native first. */ 372cb039ef3SIlya Maximets cfg.xdp_flags |= XDP_FLAGS_DRV_MODE; 373cb039ef3SIlya Maximets 374cb039ef3SIlya Maximets if (xsk_socket__create(&s->xsk, s->ifname, queue_id, 375cb039ef3SIlya Maximets s->umem, &s->rx, &s->tx, &cfg)) { 376cb039ef3SIlya Maximets /* Can't use native mode, try skb. */ 377cb039ef3SIlya Maximets cfg.xdp_flags &= ~XDP_FLAGS_DRV_MODE; 378cb039ef3SIlya Maximets cfg.xdp_flags |= XDP_FLAGS_SKB_MODE; 379cb039ef3SIlya Maximets 380cb039ef3SIlya Maximets if (xsk_socket__create(&s->xsk, s->ifname, queue_id, 381cb039ef3SIlya Maximets s->umem, &s->rx, &s->tx, &cfg)) { 382cb039ef3SIlya Maximets error = errno; 383cb039ef3SIlya Maximets } 384cb039ef3SIlya Maximets } 385cb039ef3SIlya Maximets } 386cb039ef3SIlya Maximets 387cb039ef3SIlya Maximets if (error) { 388cb039ef3SIlya Maximets error_setg_errno(errp, error, 389cb039ef3SIlya Maximets "failed to create AF_XDP socket for %s queue_id: %d", 390cb039ef3SIlya Maximets s->ifname, queue_id); 391cb039ef3SIlya Maximets return -1; 392cb039ef3SIlya Maximets } 393cb039ef3SIlya Maximets 394cb039ef3SIlya Maximets s->xdp_flags = cfg.xdp_flags; 395cb039ef3SIlya Maximets 396cb039ef3SIlya Maximets return 0; 397cb039ef3SIlya Maximets } 398cb039ef3SIlya Maximets 399cb039ef3SIlya Maximets /* NetClientInfo methods. */ 400cb039ef3SIlya Maximets static NetClientInfo net_af_xdp_info = { 401cb039ef3SIlya Maximets .type = NET_CLIENT_DRIVER_AF_XDP, 402cb039ef3SIlya Maximets .size = sizeof(AFXDPState), 403cb039ef3SIlya Maximets .receive = af_xdp_receive, 404cb039ef3SIlya Maximets .poll = af_xdp_poll, 405cb039ef3SIlya Maximets .cleanup = af_xdp_cleanup, 406cb039ef3SIlya Maximets }; 407cb039ef3SIlya Maximets 408cb039ef3SIlya Maximets static int *parse_socket_fds(const char *sock_fds_str, 409cb039ef3SIlya Maximets int64_t n_expected, Error **errp) 410cb039ef3SIlya Maximets { 411cb039ef3SIlya Maximets gchar **substrings = g_strsplit(sock_fds_str, ":", -1); 412cb039ef3SIlya Maximets int64_t i, n_sock_fds = g_strv_length(substrings); 413cb039ef3SIlya Maximets int *sock_fds = NULL; 414cb039ef3SIlya Maximets 415cb039ef3SIlya Maximets if (n_sock_fds != n_expected) { 416cb039ef3SIlya Maximets error_setg(errp, "expected %"PRIi64" socket fds, got %"PRIi64, 417cb039ef3SIlya Maximets n_expected, n_sock_fds); 418cb039ef3SIlya Maximets goto exit; 419cb039ef3SIlya Maximets } 420cb039ef3SIlya Maximets 421cb039ef3SIlya Maximets sock_fds = g_new(int, n_sock_fds); 422cb039ef3SIlya Maximets 423cb039ef3SIlya Maximets for (i = 0; i < n_sock_fds; i++) { 424cb039ef3SIlya Maximets sock_fds[i] = monitor_fd_param(monitor_cur(), substrings[i], errp); 425cb039ef3SIlya Maximets if (sock_fds[i] < 0) { 426cb039ef3SIlya Maximets g_free(sock_fds); 427cb039ef3SIlya Maximets sock_fds = NULL; 428cb039ef3SIlya Maximets goto exit; 429cb039ef3SIlya Maximets } 430cb039ef3SIlya Maximets } 431cb039ef3SIlya Maximets 432cb039ef3SIlya Maximets exit: 433cb039ef3SIlya Maximets g_strfreev(substrings); 434cb039ef3SIlya Maximets return sock_fds; 435cb039ef3SIlya Maximets } 436cb039ef3SIlya Maximets 437cb039ef3SIlya Maximets /* 438cb039ef3SIlya Maximets * The exported init function. 439cb039ef3SIlya Maximets * 440cb039ef3SIlya Maximets * ... -netdev af-xdp,ifname="..." 441cb039ef3SIlya Maximets */ 442cb039ef3SIlya Maximets int net_init_af_xdp(const Netdev *netdev, 443cb039ef3SIlya Maximets const char *name, NetClientState *peer, Error **errp) 444cb039ef3SIlya Maximets { 445cb039ef3SIlya Maximets const NetdevAFXDPOptions *opts = &netdev->u.af_xdp; 446cb039ef3SIlya Maximets NetClientState *nc, *nc0 = NULL; 447cb039ef3SIlya Maximets unsigned int ifindex; 448cb039ef3SIlya Maximets uint32_t prog_id = 0; 449*bed150beSPeter Maydell g_autofree int *sock_fds = NULL; 450cb039ef3SIlya Maximets int64_t i, queues; 451cb039ef3SIlya Maximets Error *err = NULL; 452cb039ef3SIlya Maximets AFXDPState *s; 453cb039ef3SIlya Maximets 454cb039ef3SIlya Maximets ifindex = if_nametoindex(opts->ifname); 455cb039ef3SIlya Maximets if (!ifindex) { 456cb039ef3SIlya Maximets error_setg_errno(errp, errno, "failed to get ifindex for '%s'", 457cb039ef3SIlya Maximets opts->ifname); 458cb039ef3SIlya Maximets return -1; 459cb039ef3SIlya Maximets } 460cb039ef3SIlya Maximets 461cb039ef3SIlya Maximets queues = opts->has_queues ? opts->queues : 1; 462cb039ef3SIlya Maximets if (queues < 1) { 463cb039ef3SIlya Maximets error_setg(errp, "invalid number of queues (%" PRIi64 ") for '%s'", 464cb039ef3SIlya Maximets queues, opts->ifname); 465cb039ef3SIlya Maximets return -1; 466cb039ef3SIlya Maximets } 467cb039ef3SIlya Maximets 468cb039ef3SIlya Maximets if ((opts->has_inhibit && opts->inhibit) != !!opts->sock_fds) { 469cb039ef3SIlya Maximets error_setg(errp, "'inhibit=on' requires 'sock-fds' and vice versa"); 470cb039ef3SIlya Maximets return -1; 471cb039ef3SIlya Maximets } 472cb039ef3SIlya Maximets 473cb039ef3SIlya Maximets if (opts->sock_fds) { 474cb039ef3SIlya Maximets sock_fds = parse_socket_fds(opts->sock_fds, queues, errp); 475cb039ef3SIlya Maximets if (!sock_fds) { 476cb039ef3SIlya Maximets return -1; 477cb039ef3SIlya Maximets } 478cb039ef3SIlya Maximets } 479cb039ef3SIlya Maximets 480cb039ef3SIlya Maximets for (i = 0; i < queues; i++) { 481cb039ef3SIlya Maximets nc = qemu_new_net_client(&net_af_xdp_info, peer, "af-xdp", name); 482cb039ef3SIlya Maximets qemu_set_info_str(nc, "af-xdp%"PRIi64" to %s", i, opts->ifname); 483cb039ef3SIlya Maximets nc->queue_index = i; 484cb039ef3SIlya Maximets 485cb039ef3SIlya Maximets if (!nc0) { 486cb039ef3SIlya Maximets nc0 = nc; 487cb039ef3SIlya Maximets } 488cb039ef3SIlya Maximets 489cb039ef3SIlya Maximets s = DO_UPCAST(AFXDPState, nc, nc); 490cb039ef3SIlya Maximets 491cb039ef3SIlya Maximets pstrcpy(s->ifname, sizeof(s->ifname), opts->ifname); 492cb039ef3SIlya Maximets s->ifindex = ifindex; 493cb039ef3SIlya Maximets s->n_queues = queues; 494cb039ef3SIlya Maximets 495cb039ef3SIlya Maximets if (af_xdp_umem_create(s, sock_fds ? sock_fds[i] : -1, errp) 496cb039ef3SIlya Maximets || af_xdp_socket_create(s, opts, errp)) { 497cb039ef3SIlya Maximets /* Make sure the XDP program will be removed. */ 498cb039ef3SIlya Maximets s->n_queues = i; 499cb039ef3SIlya Maximets error_propagate(errp, err); 500cb039ef3SIlya Maximets goto err; 501cb039ef3SIlya Maximets } 502cb039ef3SIlya Maximets } 503cb039ef3SIlya Maximets 504cb039ef3SIlya Maximets if (nc0) { 505cb039ef3SIlya Maximets s = DO_UPCAST(AFXDPState, nc, nc0); 506cb039ef3SIlya Maximets if (bpf_xdp_query_id(s->ifindex, s->xdp_flags, &prog_id) || !prog_id) { 507cb039ef3SIlya Maximets error_setg_errno(errp, errno, 508cb039ef3SIlya Maximets "no XDP program loaded on '%s', ifindex: %d", 509cb039ef3SIlya Maximets s->ifname, s->ifindex); 510cb039ef3SIlya Maximets goto err; 511cb039ef3SIlya Maximets } 512cb039ef3SIlya Maximets } 513cb039ef3SIlya Maximets 514cb039ef3SIlya Maximets af_xdp_read_poll(s, true); /* Initially only poll for reads. */ 515cb039ef3SIlya Maximets 516cb039ef3SIlya Maximets return 0; 517cb039ef3SIlya Maximets 518cb039ef3SIlya Maximets err: 519cb039ef3SIlya Maximets if (nc0) { 520cb039ef3SIlya Maximets qemu_del_net_client(nc0); 521cb039ef3SIlya Maximets } 522cb039ef3SIlya Maximets 523cb039ef3SIlya Maximets return -1; 524cb039ef3SIlya Maximets } 525