xref: /qemu/hw/net/xen_nic.c (revision b20c6b9e47772b9162ed194e7b2884afa6a354ab)
1 /*
2  *  xen paravirt network card backend
3  *
4  *  (c) Gerd Hoffmann <kraxel@redhat.com>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; under version 2 of the License.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License along
16  *  with this program; if not, see <http://www.gnu.org/licenses/>.
17  *
18  *  Contributions after 2012-01-13 are licensed under the terms of the
19  *  GNU GPL, version 2 or (at your option) any later version.
20  */
21 
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdarg.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <signal.h>
28 #include <inttypes.h>
29 #include <fcntl.h>
30 #include <errno.h>
31 #include <sys/socket.h>
32 #include <sys/ioctl.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <sys/mman.h>
36 #include <sys/wait.h>
37 
38 #include "hw.h"
39 #include "net.h"
40 #include "net/checksum.h"
41 #include "net/util.h"
42 #include "qemu-char.h"
43 #include "xen_backend.h"
44 
45 #include <xen/io/netif.h>
46 
47 /* ------------------------------------------------------------- */
48 
49 struct XenNetDev {
50     struct XenDevice      xendev;  /* must be first */
51     char                  *mac;
52     int                   tx_work;
53     int                   tx_ring_ref;
54     int                   rx_ring_ref;
55     struct netif_tx_sring *txs;
56     struct netif_rx_sring *rxs;
57     netif_tx_back_ring_t  tx_ring;
58     netif_rx_back_ring_t  rx_ring;
59     NICConf               conf;
60     NICState              *nic;
61 };
62 
63 /* ------------------------------------------------------------- */
64 
65 static void net_tx_response(struct XenNetDev *netdev, netif_tx_request_t *txp, int8_t st)
66 {
67     RING_IDX i = netdev->tx_ring.rsp_prod_pvt;
68     netif_tx_response_t *resp;
69     int notify;
70 
71     resp = RING_GET_RESPONSE(&netdev->tx_ring, i);
72     resp->id     = txp->id;
73     resp->status = st;
74 
75 #if 0
76     if (txp->flags & NETTXF_extra_info) {
77         RING_GET_RESPONSE(&netdev->tx_ring, ++i)->status = NETIF_RSP_NULL;
78     }
79 #endif
80 
81     netdev->tx_ring.rsp_prod_pvt = ++i;
82     RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->tx_ring, notify);
83     if (notify) {
84         xen_be_send_notify(&netdev->xendev);
85     }
86 
87     if (i == netdev->tx_ring.req_cons) {
88         int more_to_do;
89         RING_FINAL_CHECK_FOR_REQUESTS(&netdev->tx_ring, more_to_do);
90         if (more_to_do) {
91             netdev->tx_work++;
92         }
93     }
94 }
95 
96 static void net_tx_error(struct XenNetDev *netdev, netif_tx_request_t *txp, RING_IDX end)
97 {
98 #if 0
99     /*
100      * Hmm, why netback fails everything in the ring?
101      * Should we do that even when not supporting SG and TSO?
102      */
103     RING_IDX cons = netdev->tx_ring.req_cons;
104 
105     do {
106         make_tx_response(netif, txp, NETIF_RSP_ERROR);
107         if (cons >= end) {
108             break;
109         }
110         txp = RING_GET_REQUEST(&netdev->tx_ring, cons++);
111     } while (1);
112     netdev->tx_ring.req_cons = cons;
113     netif_schedule_work(netif);
114     netif_put(netif);
115 #else
116     net_tx_response(netdev, txp, NETIF_RSP_ERROR);
117 #endif
118 }
119 
120 static void net_tx_packets(struct XenNetDev *netdev)
121 {
122     netif_tx_request_t txreq;
123     RING_IDX rc, rp;
124     void *page;
125     void *tmpbuf = NULL;
126 
127     for (;;) {
128         rc = netdev->tx_ring.req_cons;
129         rp = netdev->tx_ring.sring->req_prod;
130         xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
131 
132         while ((rc != rp)) {
133             if (RING_REQUEST_CONS_OVERFLOW(&netdev->tx_ring, rc)) {
134                 break;
135             }
136             memcpy(&txreq, RING_GET_REQUEST(&netdev->tx_ring, rc), sizeof(txreq));
137             netdev->tx_ring.req_cons = ++rc;
138 
139 #if 1
140             /* should not happen in theory, we don't announce the *
141              * feature-{sg,gso,whatelse} flags in xenstore (yet?) */
142             if (txreq.flags & NETTXF_extra_info) {
143                 xen_be_printf(&netdev->xendev, 0, "FIXME: extra info flag\n");
144                 net_tx_error(netdev, &txreq, rc);
145                 continue;
146             }
147             if (txreq.flags & NETTXF_more_data) {
148                 xen_be_printf(&netdev->xendev, 0, "FIXME: more data flag\n");
149                 net_tx_error(netdev, &txreq, rc);
150                 continue;
151             }
152 #endif
153 
154             if (txreq.size < 14) {
155                 xen_be_printf(&netdev->xendev, 0, "bad packet size: %d\n", txreq.size);
156                 net_tx_error(netdev, &txreq, rc);
157                 continue;
158             }
159 
160             if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) {
161                 xen_be_printf(&netdev->xendev, 0, "error: page crossing\n");
162                 net_tx_error(netdev, &txreq, rc);
163                 continue;
164             }
165 
166             xen_be_printf(&netdev->xendev, 3, "tx packet ref %d, off %d, len %d, flags 0x%x%s%s%s%s\n",
167                           txreq.gref, txreq.offset, txreq.size, txreq.flags,
168                           (txreq.flags & NETTXF_csum_blank)     ? " csum_blank"     : "",
169                           (txreq.flags & NETTXF_data_validated) ? " data_validated" : "",
170                           (txreq.flags & NETTXF_more_data)      ? " more_data"      : "",
171                           (txreq.flags & NETTXF_extra_info)     ? " extra_info"     : "");
172 
173             page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
174                                            netdev->xendev.dom,
175                                            txreq.gref, PROT_READ);
176             if (page == NULL) {
177                 xen_be_printf(&netdev->xendev, 0, "error: tx gref dereference failed (%d)\n",
178                               txreq.gref);
179                 net_tx_error(netdev, &txreq, rc);
180                 continue;
181             }
182             if (txreq.flags & NETTXF_csum_blank) {
183                 /* have read-only mapping -> can't fill checksum in-place */
184                 if (!tmpbuf) {
185                     tmpbuf = g_malloc(XC_PAGE_SIZE);
186                 }
187                 memcpy(tmpbuf, page + txreq.offset, txreq.size);
188                 net_checksum_calculate(tmpbuf, txreq.size);
189                 qemu_send_packet(&netdev->nic->nc, tmpbuf, txreq.size);
190             } else {
191                 qemu_send_packet(&netdev->nic->nc, page + txreq.offset, txreq.size);
192             }
193             xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1);
194             net_tx_response(netdev, &txreq, NETIF_RSP_OKAY);
195         }
196         if (!netdev->tx_work) {
197             break;
198         }
199         netdev->tx_work = 0;
200     }
201     g_free(tmpbuf);
202 }
203 
204 /* ------------------------------------------------------------- */
205 
206 static void net_rx_response(struct XenNetDev *netdev,
207                             netif_rx_request_t *req, int8_t st,
208                             uint16_t offset, uint16_t size,
209                             uint16_t flags)
210 {
211     RING_IDX i = netdev->rx_ring.rsp_prod_pvt;
212     netif_rx_response_t *resp;
213     int notify;
214 
215     resp = RING_GET_RESPONSE(&netdev->rx_ring, i);
216     resp->offset     = offset;
217     resp->flags      = flags;
218     resp->id         = req->id;
219     resp->status     = (int16_t)size;
220     if (st < 0) {
221         resp->status = (int16_t)st;
222     }
223 
224     xen_be_printf(&netdev->xendev, 3, "rx response: idx %d, status %d, flags 0x%x\n",
225                   i, resp->status, resp->flags);
226 
227     netdev->rx_ring.rsp_prod_pvt = ++i;
228     RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->rx_ring, notify);
229     if (notify) {
230         xen_be_send_notify(&netdev->xendev);
231     }
232 }
233 
234 #define NET_IP_ALIGN 2
235 
236 static int net_rx_ok(NetClientState *nc)
237 {
238     struct XenNetDev *netdev = DO_UPCAST(NICState, nc, nc)->opaque;
239     RING_IDX rc, rp;
240 
241     if (netdev->xendev.be_state != XenbusStateConnected) {
242         return 0;
243     }
244 
245     rc = netdev->rx_ring.req_cons;
246     rp = netdev->rx_ring.sring->req_prod;
247     xen_rmb();
248 
249     if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
250         xen_be_printf(&netdev->xendev, 2, "%s: no rx buffers (%d/%d)\n",
251                       __FUNCTION__, rc, rp);
252         return 0;
253     }
254     return 1;
255 }
256 
257 static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size)
258 {
259     struct XenNetDev *netdev = DO_UPCAST(NICState, nc, nc)->opaque;
260     netif_rx_request_t rxreq;
261     RING_IDX rc, rp;
262     void *page;
263 
264     if (netdev->xendev.be_state != XenbusStateConnected) {
265         return -1;
266     }
267 
268     rc = netdev->rx_ring.req_cons;
269     rp = netdev->rx_ring.sring->req_prod;
270     xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
271 
272     if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
273         xen_be_printf(&netdev->xendev, 2, "no buffer, drop packet\n");
274         return -1;
275     }
276     if (size > XC_PAGE_SIZE - NET_IP_ALIGN) {
277         xen_be_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)",
278                       (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN);
279         return -1;
280     }
281 
282     memcpy(&rxreq, RING_GET_REQUEST(&netdev->rx_ring, rc), sizeof(rxreq));
283     netdev->rx_ring.req_cons = ++rc;
284 
285     page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
286                                    netdev->xendev.dom,
287                                    rxreq.gref, PROT_WRITE);
288     if (page == NULL) {
289         xen_be_printf(&netdev->xendev, 0, "error: rx gref dereference failed (%d)\n",
290                       rxreq.gref);
291         net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0);
292         return -1;
293     }
294     memcpy(page + NET_IP_ALIGN, buf, size);
295     xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1);
296     net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0);
297 
298     return size;
299 }
300 
301 /* ------------------------------------------------------------- */
302 
303 static NetClientInfo net_xen_info = {
304     .type = NET_CLIENT_OPTIONS_KIND_NIC,
305     .size = sizeof(NICState),
306     .can_receive = net_rx_ok,
307     .receive = net_rx_packet,
308 };
309 
310 static int net_init(struct XenDevice *xendev)
311 {
312     struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
313 
314     /* read xenstore entries */
315     if (netdev->mac == NULL) {
316         netdev->mac = xenstore_read_be_str(&netdev->xendev, "mac");
317     }
318 
319     /* do we have all we need? */
320     if (netdev->mac == NULL) {
321         return -1;
322     }
323 
324     if (net_parse_macaddr(netdev->conf.macaddr.a, netdev->mac) < 0) {
325         return -1;
326     }
327 
328     netdev->conf.peer = NULL;
329 
330     netdev->nic = qemu_new_nic(&net_xen_info, &netdev->conf,
331                                "xen", NULL, netdev);
332 
333     snprintf(netdev->nic->nc.info_str, sizeof(netdev->nic->nc.info_str),
334              "nic: xenbus vif macaddr=%s", netdev->mac);
335 
336     /* fill info */
337     xenstore_write_be_int(&netdev->xendev, "feature-rx-copy", 1);
338     xenstore_write_be_int(&netdev->xendev, "feature-rx-flip", 0);
339 
340     return 0;
341 }
342 
343 static int net_connect(struct XenDevice *xendev)
344 {
345     struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
346     int rx_copy;
347 
348     if (xenstore_read_fe_int(&netdev->xendev, "tx-ring-ref",
349                              &netdev->tx_ring_ref) == -1) {
350         return -1;
351     }
352     if (xenstore_read_fe_int(&netdev->xendev, "rx-ring-ref",
353                              &netdev->rx_ring_ref) == -1) {
354         return 1;
355     }
356     if (xenstore_read_fe_int(&netdev->xendev, "event-channel",
357                              &netdev->xendev.remote_port) == -1) {
358         return -1;
359     }
360 
361     if (xenstore_read_fe_int(&netdev->xendev, "request-rx-copy", &rx_copy) == -1) {
362         rx_copy = 0;
363     }
364     if (rx_copy == 0) {
365         xen_be_printf(&netdev->xendev, 0, "frontend doesn't support rx-copy.\n");
366         return -1;
367     }
368 
369     netdev->txs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
370                                           netdev->xendev.dom,
371                                           netdev->tx_ring_ref,
372                                           PROT_READ | PROT_WRITE);
373     netdev->rxs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
374                                           netdev->xendev.dom,
375                                           netdev->rx_ring_ref,
376                                           PROT_READ | PROT_WRITE);
377     if (!netdev->txs || !netdev->rxs) {
378         return -1;
379     }
380     BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE);
381     BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE);
382 
383     xen_be_bind_evtchn(&netdev->xendev);
384 
385     xen_be_printf(&netdev->xendev, 1, "ok: tx-ring-ref %d, rx-ring-ref %d, "
386                   "remote port %d, local port %d\n",
387                   netdev->tx_ring_ref, netdev->rx_ring_ref,
388                   netdev->xendev.remote_port, netdev->xendev.local_port);
389 
390     net_tx_packets(netdev);
391     return 0;
392 }
393 
394 static void net_disconnect(struct XenDevice *xendev)
395 {
396     struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
397 
398     xen_be_unbind_evtchn(&netdev->xendev);
399 
400     if (netdev->txs) {
401         xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->txs, 1);
402         netdev->txs = NULL;
403     }
404     if (netdev->rxs) {
405         xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->rxs, 1);
406         netdev->rxs = NULL;
407     }
408     if (netdev->nic) {
409         qemu_del_net_client(&netdev->nic->nc);
410         netdev->nic = NULL;
411     }
412 }
413 
414 static void net_event(struct XenDevice *xendev)
415 {
416     struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
417     net_tx_packets(netdev);
418 }
419 
420 static int net_free(struct XenDevice *xendev)
421 {
422     struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
423 
424     g_free(netdev->mac);
425     return 0;
426 }
427 
428 /* ------------------------------------------------------------- */
429 
430 struct XenDevOps xen_netdev_ops = {
431     .size       = sizeof(struct XenNetDev),
432     .flags      = DEVOPS_FLAG_NEED_GNTDEV,
433     .init       = net_init,
434     .initialise    = net_connect,
435     .event      = net_event,
436     .disconnect = net_disconnect,
437     .free       = net_free,
438 };
439