xref: /qemu/hw/i386/kvm/xen_xenstore.c (revision 513823e7521a09ed7ad1e32e6454bac3b2cbf52d)
1 /*
2  * QEMU Xen emulation: Shared/overlay pages support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "qemu/host-utils.h"
15 #include "qemu/module.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qapi/error.h"
20 #include "qom/object.h"
21 #include "migration/vmstate.h"
22 
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_evtchn.h"
28 #include "xen_primary_console.h"
29 #include "xen_xenstore.h"
30 
31 #include "system/kvm.h"
32 #include "system/kvm_xen.h"
33 
34 #include "trace.h"
35 
36 #include "xenstore_impl.h"
37 
38 #include "hw/xen/interface/io/xs_wire.h"
39 #include "hw/xen/interface/event_channel.h"
40 #include "hw/xen/interface/grant_table.h"
41 
42 #define TYPE_XEN_XENSTORE "xen-xenstore"
43 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
44 
45 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
46 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
47 
48 #define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg))
49 
50 struct XenXenstoreState {
51     /*< private >*/
52     SysBusDevice busdev;
53     /*< public >*/
54 
55     XenstoreImplState *impl;
56     GList *watch_events; /* for the guest */
57 
58     MemoryRegion xenstore_page;
59     struct xenstore_domain_interface *xs;
60     uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
61     uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
62     uint32_t req_offset;
63     uint32_t rsp_offset;
64     bool rsp_pending;
65     bool fatal_error;
66 
67     evtchn_port_t guest_port;
68     evtchn_port_t be_port;
69     struct xenevtchn_handle *eh;
70 
71     uint8_t *impl_state;
72     uint32_t impl_state_size;
73 
74     struct xengntdev_handle *gt;
75     void *granted_xs;
76 };
77 
78 struct XenXenstoreState *xen_xenstore_singleton;
79 
80 static void xen_xenstore_event(void *opaque);
81 static void fire_watch_cb(void *opaque, const char *path, const char *token);
82 
83 static struct xenstore_backend_ops emu_xenstore_backend_ops;
84 
85 static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
86                                                 GList *perms,
87                                                 const char *relpath,
88                                                 const char *fmt, ...)
89 {
90     gchar *abspath;
91     gchar *value;
92     va_list args;
93     GByteArray *data;
94     int err;
95 
96     abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
97     va_start(args, fmt);
98     value = g_strdup_vprintf(fmt, args);
99     va_end(args);
100 
101     data = g_byte_array_new_take((void *)value, strlen(value));
102 
103     err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
104     assert(!err);
105 
106     g_byte_array_unref(data);
107 
108     err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
109     assert(!err);
110 
111     g_free(abspath);
112 }
113 
114 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
115 {
116     XenXenstoreState *s = XEN_XENSTORE(dev);
117     GList *perms;
118 
119     if (xen_mode != XEN_EMULATE) {
120         error_setg(errp, "Xen xenstore support is for Xen emulation");
121         return;
122     }
123     memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page",
124                            XEN_PAGE_SIZE, &error_abort);
125     memory_region_set_enabled(&s->xenstore_page, true);
126     s->xs = memory_region_get_ram_ptr(&s->xenstore_page);
127     memset(s->xs, 0, XEN_PAGE_SIZE);
128 
129     /* We can't map it this early as KVM isn't ready */
130     xen_xenstore_singleton = s;
131 
132     s->eh = xen_be_evtchn_open();
133     if (!s->eh) {
134         error_setg(errp, "Xenstore evtchn port init failed");
135         return;
136     }
137     aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh),
138                        xen_xenstore_event, NULL, NULL, NULL, s);
139 
140     s->impl = xs_impl_create(xen_domid);
141 
142     /* Populate the default nodes */
143 
144     /* Nodes owned by 'dom0' but readable by the guest */
145     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
146     perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
147 
148     relpath_printf(s, perms, "", "%s", "");
149 
150     relpath_printf(s, perms, "domid", "%u", xen_domid);
151 
152     relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1);
153     relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1);
154 
155     relpath_printf(s, perms, "platform/acpi", "%u", 1);
156     relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
157     relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
158     relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
159 
160     g_list_free_full(perms, g_free);
161 
162     /* Nodes owned by the guest */
163     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
164 
165     relpath_printf(s, perms, "attr", "%s", "");
166 
167     relpath_printf(s, perms, "control/shutdown", "%s", "");
168     relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
169     relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
170     relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
171     relpath_printf(s, perms, "control/feature-s3", "%u", 1);
172     relpath_printf(s, perms, "control/feature-s4", "%u", 1);
173 
174     relpath_printf(s, perms, "data", "%s", "");
175     relpath_printf(s, perms, "device", "%s", "");
176     relpath_printf(s, perms, "drivers", "%s", "");
177     relpath_printf(s, perms, "error", "%s", "");
178     relpath_printf(s, perms, "feature", "%s", "");
179 
180     g_list_free_full(perms, g_free);
181 
182     xen_xenstore_ops = &emu_xenstore_backend_ops;
183 }
184 
185 static bool xen_xenstore_is_needed(void *opaque)
186 {
187     return xen_mode == XEN_EMULATE;
188 }
189 
190 static int xen_xenstore_pre_save(void *opaque)
191 {
192     XenXenstoreState *s = opaque;
193     GByteArray *save;
194 
195     if (s->eh) {
196         s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
197     }
198 
199     g_free(s->impl_state);
200     save = xs_impl_serialize(s->impl);
201     s->impl_state = save->data;
202     s->impl_state_size = save->len;
203     g_byte_array_free(save, false);
204 
205     return 0;
206 }
207 
208 static int xen_xenstore_post_load(void *opaque, int ver)
209 {
210     XenXenstoreState *s = opaque;
211     GByteArray *save;
212     int ret;
213 
214     /*
215      * As qemu/dom0, rebind to the guest's port. The Windows drivers may
216      * unbind the XenStore evtchn and rebind to it, having obtained the
217      * "remote" port through EVTCHNOP_status. In the case that migration
218      * occurs while it's unbound, the "remote" port needs to be the same
219      * as before so that the guest can find it, but should remain unbound.
220      */
221     if (s->guest_port) {
222         int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid,
223                                                      s->guest_port);
224         if (be_port < 0) {
225             return be_port;
226         }
227         s->be_port = be_port;
228     }
229 
230     save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
231     s->impl_state = NULL;
232     s->impl_state_size = 0;
233 
234     ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
235     return ret;
236 }
237 
238 static const VMStateDescription xen_xenstore_vmstate = {
239     .name = "xen_xenstore",
240     .unmigratable = 1, /* The PV back ends don't migrate yet */
241     .version_id = 1,
242     .minimum_version_id = 1,
243     .needed = xen_xenstore_is_needed,
244     .pre_save = xen_xenstore_pre_save,
245     .post_load = xen_xenstore_post_load,
246     .fields = (const VMStateField[]) {
247         VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState,
248                             sizeof_field(XenXenstoreState, req_data)),
249         VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState,
250                             sizeof_field(XenXenstoreState, rsp_data)),
251         VMSTATE_UINT32(req_offset, XenXenstoreState),
252         VMSTATE_UINT32(rsp_offset, XenXenstoreState),
253         VMSTATE_BOOL(rsp_pending, XenXenstoreState),
254         VMSTATE_UINT32(guest_port, XenXenstoreState),
255         VMSTATE_BOOL(fatal_error, XenXenstoreState),
256         VMSTATE_UINT32(impl_state_size, XenXenstoreState),
257         VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
258                                     impl_state_size, 0,
259                                     vmstate_info_uint8, uint8_t),
260         VMSTATE_END_OF_LIST()
261     }
262 };
263 
264 static void xen_xenstore_class_init(ObjectClass *klass, void *data)
265 {
266     DeviceClass *dc = DEVICE_CLASS(klass);
267 
268     dc->realize = xen_xenstore_realize;
269     dc->vmsd = &xen_xenstore_vmstate;
270 }
271 
272 static const TypeInfo xen_xenstore_info = {
273     .name          = TYPE_XEN_XENSTORE,
274     .parent        = TYPE_SYS_BUS_DEVICE,
275     .instance_size = sizeof(XenXenstoreState),
276     .class_init    = xen_xenstore_class_init,
277 };
278 
279 void xen_xenstore_create(void)
280 {
281     DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL);
282 
283     xen_xenstore_singleton = XEN_XENSTORE(dev);
284 
285     /*
286      * Defer the init (xen_xenstore_reset()) until KVM is set up and the
287      * overlay page can be mapped.
288      */
289 }
290 
291 static void xen_xenstore_register_types(void)
292 {
293     type_register_static(&xen_xenstore_info);
294 }
295 
296 type_init(xen_xenstore_register_types)
297 
298 uint16_t xen_xenstore_get_port(void)
299 {
300     XenXenstoreState *s = xen_xenstore_singleton;
301     if (!s) {
302         return 0;
303     }
304     return s->guest_port;
305 }
306 
307 static bool req_pending(XenXenstoreState *s)
308 {
309     struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
310 
311     return s->req_offset == XENSTORE_HEADER_SIZE + req->len;
312 }
313 
314 static void reset_req(XenXenstoreState *s)
315 {
316     memset(s->req_data, 0, sizeof(s->req_data));
317     s->req_offset = 0;
318 }
319 
320 static void reset_rsp(XenXenstoreState *s)
321 {
322     s->rsp_pending = false;
323 
324     memset(s->rsp_data, 0, sizeof(s->rsp_data));
325     s->rsp_offset = 0;
326 }
327 
328 static void xs_error(XenXenstoreState *s, unsigned int id,
329                      xs_transaction_t tx_id, int errnum)
330 {
331     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
332     const char *errstr = NULL;
333 
334     for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
335         const struct xsd_errors *xsd_error = &xsd_errors[i];
336 
337         if (xsd_error->errnum == errnum) {
338             errstr = xsd_error->errstring;
339             break;
340         }
341     }
342     assert(errstr);
343 
344     trace_xenstore_error(id, tx_id, errstr);
345 
346     rsp->type = XS_ERROR;
347     rsp->req_id = id;
348     rsp->tx_id = tx_id;
349     rsp->len = (uint32_t)strlen(errstr) + 1;
350 
351     memcpy(&rsp[1], errstr, rsp->len);
352 }
353 
354 static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
355                   xs_transaction_t tx_id)
356 {
357     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
358     const char *okstr = "OK";
359 
360     rsp->type = type;
361     rsp->req_id = req_id;
362     rsp->tx_id = tx_id;
363     rsp->len = (uint32_t)strlen(okstr) + 1;
364 
365     memcpy(&rsp[1], okstr, rsp->len);
366 }
367 
368 /*
369  * The correct request and response formats are documented in xen.git:
370  * docs/misc/xenstore.txt. A summary is given below for convenience.
371  * The '|' symbol represents a NUL character.
372  *
373  * ---------- Database read, write and permissions operations ----------
374  *
375  * READ                    <path>|                 <value|>
376  * WRITE                   <path>|<value|>
377  *         Store and read the octet string <value> at <path>.
378  *         WRITE creates any missing parent paths, with empty values.
379  *
380  * MKDIR                   <path>|
381  *         Ensures that the <path> exists, by necessary by creating
382  *         it and any missing parents with empty values.  If <path>
383  *         or any parent already exists, its value is left unchanged.
384  *
385  * RM                      <path>|
386  *         Ensures that the <path> does not exist, by deleting
387  *         it and all of its children.  It is not an error if <path> does
388  *         not exist, but it _is_ an error if <path>'s immediate parent
389  *         does not exist either.
390  *
391  * DIRECTORY               <path>|                 <child-leaf-name>|*
392  *         Gives a list of the immediate children of <path>, as only the
393  *         leafnames.  The resulting children are each named
394  *         <path>/<child-leaf-name>.
395  *
396  * DIRECTORY_PART          <path>|<offset>         <gencnt>|<child-leaf-name>|*
397  *         Same as DIRECTORY, but to be used for children lists longer than
398  *         XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into
399  *         the list of children to return. Return values are the generation
400  *         count <gencnt> of the node (to be used to ensure the node hasn't
401  *         changed between two reads: <gencnt> being the same for multiple
402  *         reads guarantees the node hasn't changed) and the list of children
403  *         starting at the specified <offset> of the complete list.
404  *
405  * GET_PERMS               <path>|                 <perm-as-string>|+
406  * SET_PERMS               <path>|<perm-as-string>|+?
407  *         <perm-as-string> is one of the following
408  *                 w<domid>        write only
409  *                 r<domid>        read only
410  *                 b<domid>        both read and write
411  *                 n<domid>        no access
412  *         See https://wiki.xen.org/wiki/XenBus section
413  *         `Permissions' for details of the permissions system.
414  *         It is possible to set permissions for the special watch paths
415  *         "@introduceDomain" and "@releaseDomain" to enable receiving those
416  *         watches in unprivileged domains.
417  *
418  * ---------- Watches ----------
419  *
420  * WATCH                   <wpath>|<token>|?
421  *         Adds a watch.
422  *
423  *         When a <path> is modified (including path creation, removal,
424  *         contents change or permissions change) this generates an event
425  *         on the changed <path>.  Changes made in transactions cause an
426  *         event only if and when committed.  Each occurring event is
427  *         matched against all the watches currently set up, and each
428  *         matching watch results in a WATCH_EVENT message (see below).
429  *
430  *         The event's path matches the watch's <wpath> if it is an child
431  *         of <wpath>.
432  *
433  *         <wpath> can be a <path> to watch or @<wspecial>.  In the
434  *         latter case <wspecial> may have any syntax but it matches
435  *         (according to the rules above) only the following special
436  *         events which are invented by xenstored:
437  *             @introduceDomain    occurs on INTRODUCE
438  *             @releaseDomain      occurs on any domain crash or
439  *                                 shutdown, and also on RELEASE
440  *                                 and domain destruction
441  *         <wspecial> events are sent to privileged callers or explicitly
442  *         via SET_PERMS enabled domains only.
443  *
444  *         When a watch is first set up it is triggered once straight
445  *         away, with <path> equal to <wpath>.  Watches may be triggered
446  *         spuriously.  The tx_id in a WATCH request is ignored.
447  *
448  *         Watches are supposed to be restricted by the permissions
449  *         system but in practice the implementation is imperfect.
450  *         Applications should not rely on being sent a notification for
451  *         paths that they cannot read; however, an application may rely
452  *         on being sent a watch when a path which it _is_ able to read
453  *         is deleted even if that leaves only a nonexistent unreadable
454  *         parent.  A notification may omitted if a node's permissions
455  *         are changed so as to make it unreadable, in which case future
456  *         notifications may be suppressed (and if the node is later made
457  *         readable, some notifications may have been lost).
458  *
459  * WATCH_EVENT                                     <epath>|<token>|
460  *         Unsolicited `reply' generated for matching modification events
461  *         as described above.  req_id and tx_id are both 0.
462  *
463  *         <epath> is the event's path, ie the actual path that was
464  *         modified; however if the event was the recursive removal of an
465  *         parent of <wpath>, <epath> is just
466  *         <wpath> (rather than the actual path which was removed).  So
467  *         <epath> is a child of <wpath>, regardless.
468  *
469  *         Iff <wpath> for the watch was specified as a relative pathname,
470  *         the <epath> path will also be relative (with the same base,
471  *         obviously).
472  *
473  * UNWATCH                 <wpath>|<token>|?
474  *
475  * RESET_WATCHES           |
476  *         Reset all watches and transactions of the caller.
477  *
478  * ---------- Transactions ----------
479  *
480  * TRANSACTION_START       |                       <transid>|
481  *         <transid> is an opaque uint32_t allocated by xenstored
482  *         represented as unsigned decimal.  After this, transaction may
483  *         be referenced by using <transid> (as 32-bit binary) in the
484  *         tx_id request header field.  When transaction is started whole
485  *         db is copied; reads and writes happen on the copy.
486  *         It is not legal to send non-0 tx_id in TRANSACTION_START.
487  *
488  * TRANSACTION_END         T|
489  * TRANSACTION_END         F|
490  *         tx_id must refer to existing transaction.  After this
491  *         request the tx_id is no longer valid and may be reused by
492  *         xenstore.  If F, the transaction is discarded.  If T,
493  *         it is committed: if there were any other intervening writes
494  *         then our END gets get EAGAIN.
495  *
496  *         The plan is that in the future only intervening `conflicting'
497  *         writes cause EAGAIN, meaning only writes or other commits
498  *         which changed paths which were read or written in the
499  *         transaction at hand.
500  *
501  */
502 
503 static void xs_read(XenXenstoreState *s, unsigned int req_id,
504                     xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
505 {
506     const char *path = (const char *)req_data;
507     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
508     uint8_t *rsp_data = (uint8_t *)&rsp[1];
509     g_autoptr(GByteArray) data = g_byte_array_new();
510     int err;
511 
512     if (len == 0 || req_data[len - 1] != '\0') {
513         xs_error(s, req_id, tx_id, EINVAL);
514         return;
515     }
516 
517     trace_xenstore_read(tx_id, path);
518     err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
519     if (err) {
520         xs_error(s, req_id, tx_id, err);
521         return;
522     }
523 
524     rsp->type = XS_READ;
525     rsp->req_id = req_id;
526     rsp->tx_id = tx_id;
527     rsp->len = 0;
528 
529     len = data->len;
530     if (len > XENSTORE_PAYLOAD_MAX) {
531         xs_error(s, req_id, tx_id, E2BIG);
532         return;
533     }
534 
535     if (!len) {
536         return;
537     }
538 
539     memcpy(&rsp_data[rsp->len], data->data, len);
540     rsp->len += len;
541 }
542 
543 static void xs_write(XenXenstoreState *s, unsigned int req_id,
544                      xs_transaction_t tx_id, uint8_t *req_data,
545                      unsigned int len)
546 {
547     g_autoptr(GByteArray) data = g_byte_array_new();
548     const char *path;
549     int err;
550 
551     if (len == 0) {
552         xs_error(s, req_id, tx_id, EINVAL);
553         return;
554     }
555 
556     path = (const char *)req_data;
557 
558     while (len--) {
559         if (*req_data++ == '\0') {
560             break;
561         }
562         if (len == 0) {
563             xs_error(s, req_id, tx_id, EINVAL);
564             return;
565         }
566     }
567 
568     g_byte_array_append(data, req_data, len);
569 
570     trace_xenstore_write(tx_id, path);
571     err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
572     if (err) {
573         xs_error(s, req_id, tx_id, err);
574         return;
575     }
576 
577     xs_ok(s, XS_WRITE, req_id, tx_id);
578 }
579 
580 static void xs_mkdir(XenXenstoreState *s, unsigned int req_id,
581                      xs_transaction_t tx_id, uint8_t *req_data,
582                      unsigned int len)
583 {
584     g_autoptr(GByteArray) data = g_byte_array_new();
585     const char *path;
586     int err;
587 
588     if (len == 0 || req_data[len - 1] != '\0') {
589         xs_error(s, req_id, tx_id, EINVAL);
590         return;
591     }
592 
593     path = (const char *)req_data;
594 
595     trace_xenstore_mkdir(tx_id, path);
596     err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
597     if (err == ENOENT) {
598         err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
599     }
600 
601     if (!err) {
602         xs_error(s, req_id, tx_id, err);
603         return;
604     }
605 
606     xs_ok(s, XS_MKDIR, req_id, tx_id);
607 }
608 
609 static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp,
610                               GList *strings, unsigned int start, bool truncate)
611 {
612     uint8_t *rsp_data = (uint8_t *)&rsp[1];
613     GList *l;
614 
615     for (l = strings; l; l = l->next) {
616         size_t len = strlen(l->data) + 1; /* Including the NUL termination */
617         char *str = l->data;
618 
619         if (rsp->len + len > XENSTORE_PAYLOAD_MAX) {
620             if (truncate) {
621                 len = XENSTORE_PAYLOAD_MAX - rsp->len;
622                 if (!len) {
623                     return;
624                 }
625             } else {
626                 xs_error(s, rsp->req_id, rsp->tx_id, E2BIG);
627                 return;
628             }
629         }
630 
631         if (start) {
632             if (start >= len) {
633                 start -= len;
634                 continue;
635             }
636 
637             str += start;
638             len -= start;
639             start = 0;
640         }
641 
642         memcpy(&rsp_data[rsp->len], str, len);
643         rsp->len += len;
644     }
645     /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */
646     if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) {
647         rsp_data[rsp->len++] = '\0';
648     }
649 }
650 
651 static void xs_directory(XenXenstoreState *s, unsigned int req_id,
652                          xs_transaction_t tx_id, uint8_t *req_data,
653                          unsigned int len)
654 {
655     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
656     GList *items = NULL;
657     const char *path;
658     int err;
659 
660     if (len == 0 || req_data[len - 1] != '\0') {
661         xs_error(s, req_id, tx_id, EINVAL);
662         return;
663     }
664 
665     path = (const char *)req_data;
666 
667     trace_xenstore_directory(tx_id, path);
668     err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items);
669     if (err != 0) {
670         xs_error(s, req_id, tx_id, err);
671         return;
672     }
673 
674     rsp->type = XS_DIRECTORY;
675     rsp->req_id = req_id;
676     rsp->tx_id = tx_id;
677     rsp->len = 0;
678 
679     xs_append_strings(s, rsp, items, 0, false);
680 
681     g_list_free_full(items, g_free);
682 }
683 
684 static void xs_directory_part(XenXenstoreState *s, unsigned int req_id,
685                               xs_transaction_t tx_id, uint8_t *req_data,
686                               unsigned int len)
687 {
688     const char *offset_str, *path = (const char *)req_data;
689     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
690     char *rsp_data = (char *)&rsp[1];
691     uint64_t gencnt = 0;
692     unsigned int offset;
693     GList *items = NULL;
694     int err;
695 
696     if (len == 0) {
697         xs_error(s, req_id, tx_id, EINVAL);
698         return;
699     }
700 
701     while (len--) {
702         if (*req_data++ == '\0') {
703             break;
704         }
705         if (len == 0) {
706             xs_error(s, req_id, tx_id, EINVAL);
707             return;
708         }
709     }
710 
711     offset_str = (const char *)req_data;
712     while (len--) {
713         if (*req_data++ == '\0') {
714             break;
715         }
716         if (len == 0) {
717             xs_error(s, req_id, tx_id, EINVAL);
718             return;
719         }
720     }
721 
722     if (len) {
723         xs_error(s, req_id, tx_id, EINVAL);
724         return;
725     }
726 
727     if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) {
728         xs_error(s, req_id, tx_id, EINVAL);
729         return;
730     }
731 
732     trace_xenstore_directory_part(tx_id, path, offset);
733     err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items);
734     if (err != 0) {
735         xs_error(s, req_id, tx_id, err);
736         return;
737     }
738 
739     rsp->type = XS_DIRECTORY_PART;
740     rsp->req_id = req_id;
741     rsp->tx_id = tx_id;
742     rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1;
743 
744     xs_append_strings(s, rsp, items, offset, true);
745 
746     g_list_free_full(items, g_free);
747 }
748 
749 static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id,
750                                  xs_transaction_t tx_id, uint8_t *req_data,
751                                  unsigned int len)
752 {
753     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
754     char *rsp_data = (char *)&rsp[1];
755     int err;
756 
757     if (len != 1 || req_data[0] != '\0') {
758         xs_error(s, req_id, tx_id, EINVAL);
759         return;
760     }
761 
762     rsp->type = XS_TRANSACTION_START;
763     rsp->req_id = req_id;
764     rsp->tx_id = tx_id;
765     rsp->len = 0;
766 
767     err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id);
768     if (err) {
769         xs_error(s, req_id, tx_id, err);
770         return;
771     }
772 
773     trace_xenstore_transaction_start(tx_id);
774 
775     rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id);
776     assert(rsp->len < XENSTORE_PAYLOAD_MAX);
777     rsp->len++;
778 }
779 
780 static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id,
781                                xs_transaction_t tx_id, uint8_t *req_data,
782                                unsigned int len)
783 {
784     bool commit;
785     int err;
786 
787     if (len != 2 || req_data[1] != '\0') {
788         xs_error(s, req_id, tx_id, EINVAL);
789         return;
790     }
791 
792     switch (req_data[0]) {
793     case 'T':
794         commit = true;
795         break;
796     case 'F':
797         commit = false;
798         break;
799     default:
800         xs_error(s, req_id, tx_id, EINVAL);
801         return;
802     }
803 
804     trace_xenstore_transaction_end(tx_id, commit);
805     err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit);
806     if (err) {
807         xs_error(s, req_id, tx_id, err);
808         return;
809     }
810 
811     xs_ok(s, XS_TRANSACTION_END, req_id, tx_id);
812 }
813 
814 static void xs_rm(XenXenstoreState *s, unsigned int req_id,
815                   xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
816 {
817     const char *path = (const char *)req_data;
818     int err;
819 
820     if (len == 0 || req_data[len - 1] != '\0') {
821         xs_error(s, req_id, tx_id, EINVAL);
822         return;
823     }
824 
825     trace_xenstore_rm(tx_id, path);
826     err = xs_impl_rm(s->impl, xen_domid, tx_id, path);
827     if (err) {
828         xs_error(s, req_id, tx_id, err);
829         return;
830     }
831 
832     xs_ok(s, XS_RM, req_id, tx_id);
833 }
834 
835 static void xs_get_perms(XenXenstoreState *s, unsigned int req_id,
836                          xs_transaction_t tx_id, uint8_t *req_data,
837                          unsigned int len)
838 {
839     const char *path = (const char *)req_data;
840     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
841     GList *perms = NULL;
842     int err;
843 
844     if (len == 0 || req_data[len - 1] != '\0') {
845         xs_error(s, req_id, tx_id, EINVAL);
846         return;
847     }
848 
849     trace_xenstore_get_perms(tx_id, path);
850     err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms);
851     if (err) {
852         xs_error(s, req_id, tx_id, err);
853         return;
854     }
855 
856     rsp->type = XS_GET_PERMS;
857     rsp->req_id = req_id;
858     rsp->tx_id = tx_id;
859     rsp->len = 0;
860 
861     xs_append_strings(s, rsp, perms, 0, false);
862 
863     g_list_free_full(perms, g_free);
864 }
865 
866 static void xs_set_perms(XenXenstoreState *s, unsigned int req_id,
867                          xs_transaction_t tx_id, uint8_t *req_data,
868                          unsigned int len)
869 {
870     const char *path = (const char *)req_data;
871     uint8_t *perm;
872     GList *perms = NULL;
873     int err;
874 
875     if (len == 0) {
876         xs_error(s, req_id, tx_id, EINVAL);
877         return;
878     }
879 
880     while (len--) {
881         if (*req_data++ == '\0') {
882             break;
883         }
884         if (len == 0) {
885             xs_error(s, req_id, tx_id, EINVAL);
886             return;
887         }
888     }
889 
890     perm = req_data;
891     while (len--) {
892         if (*req_data++ == '\0') {
893             perms = g_list_append(perms, perm);
894             perm = req_data;
895         }
896     }
897 
898     /*
899      * Note that there may be trailing garbage at the end of the buffer.
900      * This is explicitly permitted by the '?' at the end of the definition:
901      *
902      *    SET_PERMS         <path>|<perm-as-string>|+?
903      */
904 
905     trace_xenstore_set_perms(tx_id, path);
906     err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms);
907     g_list_free(perms);
908     if (err) {
909         xs_error(s, req_id, tx_id, err);
910         return;
911     }
912 
913     xs_ok(s, XS_SET_PERMS, req_id, tx_id);
914 }
915 
916 static void xs_watch(XenXenstoreState *s, unsigned int req_id,
917                      xs_transaction_t tx_id, uint8_t *req_data,
918                      unsigned int len)
919 {
920     const char *token, *path = (const char *)req_data;
921     int err;
922 
923     if (len == 0) {
924         xs_error(s, req_id, tx_id, EINVAL);
925         return;
926     }
927 
928     while (len--) {
929         if (*req_data++ == '\0') {
930             break;
931         }
932         if (len == 0) {
933             xs_error(s, req_id, tx_id, EINVAL);
934             return;
935         }
936     }
937 
938     token = (const char *)req_data;
939     while (len--) {
940         if (*req_data++ == '\0') {
941             break;
942         }
943         if (len == 0) {
944             xs_error(s, req_id, tx_id, EINVAL);
945             return;
946         }
947     }
948 
949     /*
950      * Note that there may be trailing garbage at the end of the buffer.
951      * This is explicitly permitted by the '?' at the end of the definition:
952      *
953      *    WATCH             <wpath>|<token>|?
954      */
955 
956     trace_xenstore_watch(path, token);
957     err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s);
958     if (err) {
959         xs_error(s, req_id, tx_id, err);
960         return;
961     }
962 
963     xs_ok(s, XS_WATCH, req_id, tx_id);
964 }
965 
966 static void xs_unwatch(XenXenstoreState *s, unsigned int req_id,
967                        xs_transaction_t tx_id, uint8_t *req_data,
968                        unsigned int len)
969 {
970     const char *token, *path = (const char *)req_data;
971     int err;
972 
973     if (len == 0) {
974         xs_error(s, req_id, tx_id, EINVAL);
975         return;
976     }
977 
978     while (len--) {
979         if (*req_data++ == '\0') {
980             break;
981         }
982         if (len == 0) {
983             xs_error(s, req_id, tx_id, EINVAL);
984             return;
985         }
986     }
987 
988     token = (const char *)req_data;
989     while (len--) {
990         if (*req_data++ == '\0') {
991             break;
992         }
993         if (len == 0) {
994             xs_error(s, req_id, tx_id, EINVAL);
995             return;
996         }
997     }
998 
999     trace_xenstore_unwatch(path, token);
1000     err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s);
1001     if (err) {
1002         xs_error(s, req_id, tx_id, err);
1003         return;
1004     }
1005 
1006     xs_ok(s, XS_UNWATCH, req_id, tx_id);
1007 }
1008 
1009 static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id,
1010                              xs_transaction_t tx_id, uint8_t *req_data,
1011                              unsigned int len)
1012 {
1013     if (len == 0 || req_data[len - 1] != '\0') {
1014         xs_error(s, req_id, tx_id, EINVAL);
1015         return;
1016     }
1017 
1018     trace_xenstore_reset_watches();
1019     xs_impl_reset_watches(s->impl, xen_domid);
1020 
1021     xs_ok(s, XS_RESET_WATCHES, req_id, tx_id);
1022 }
1023 
1024 static void xs_priv(XenXenstoreState *s, unsigned int req_id,
1025                     xs_transaction_t tx_id, uint8_t *data,
1026                     unsigned int len)
1027 {
1028     xs_error(s, req_id, tx_id, EACCES);
1029 }
1030 
1031 static void xs_unimpl(XenXenstoreState *s, unsigned int req_id,
1032                       xs_transaction_t tx_id, uint8_t *data,
1033                       unsigned int len)
1034 {
1035     xs_error(s, req_id, tx_id, ENOSYS);
1036 }
1037 
1038 typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id,
1039                         xs_transaction_t tx_id, uint8_t *data,
1040                         unsigned int len);
1041 
1042 struct xsd_req {
1043     const char *name;
1044     xs_impl fn;
1045 };
1046 #define XSD_REQ(_type, _fn)                           \
1047     [_type] = { .name = #_type, .fn = _fn }
1048 
1049 struct xsd_req xsd_reqs[] = {
1050     XSD_REQ(XS_READ, xs_read),
1051     XSD_REQ(XS_WRITE, xs_write),
1052     XSD_REQ(XS_MKDIR, xs_mkdir),
1053     XSD_REQ(XS_DIRECTORY, xs_directory),
1054     XSD_REQ(XS_DIRECTORY_PART, xs_directory_part),
1055     XSD_REQ(XS_TRANSACTION_START, xs_transaction_start),
1056     XSD_REQ(XS_TRANSACTION_END, xs_transaction_end),
1057     XSD_REQ(XS_RM, xs_rm),
1058     XSD_REQ(XS_GET_PERMS, xs_get_perms),
1059     XSD_REQ(XS_SET_PERMS, xs_set_perms),
1060     XSD_REQ(XS_WATCH, xs_watch),
1061     XSD_REQ(XS_UNWATCH, xs_unwatch),
1062     XSD_REQ(XS_CONTROL, xs_priv),
1063     XSD_REQ(XS_INTRODUCE, xs_priv),
1064     XSD_REQ(XS_RELEASE, xs_priv),
1065     XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv),
1066     XSD_REQ(XS_RESUME, xs_priv),
1067     XSD_REQ(XS_SET_TARGET, xs_priv),
1068     XSD_REQ(XS_RESET_WATCHES, xs_reset_watches),
1069 };
1070 
1071 static void process_req(XenXenstoreState *s)
1072 {
1073     struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1074     xs_impl handler = NULL;
1075 
1076     assert(req_pending(s));
1077     assert(!s->rsp_pending);
1078 
1079     if (req->type < ARRAY_SIZE(xsd_reqs)) {
1080         handler = xsd_reqs[req->type].fn;
1081     }
1082     if (!handler) {
1083         handler = &xs_unimpl;
1084     }
1085 
1086     handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len);
1087 
1088     s->rsp_pending = true;
1089     reset_req(s);
1090 }
1091 
1092 static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr,
1093                                    unsigned int len)
1094 {
1095     if (!len) {
1096         return 0;
1097     }
1098 
1099     XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod);
1100     XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons);
1101     unsigned int copied = 0;
1102 
1103     /* Ensure the ring contents don't cross the req_prod access. */
1104     smp_rmb();
1105 
1106     while (len) {
1107         unsigned int avail = prod - cons;
1108         unsigned int offset = MASK_XENSTORE_IDX(cons);
1109         unsigned int copylen = avail;
1110 
1111         if (avail > XENSTORE_RING_SIZE) {
1112             error_report("XenStore ring handling error");
1113             s->fatal_error = true;
1114             break;
1115         } else if (avail == 0) {
1116             break;
1117         }
1118 
1119         if (copylen > len) {
1120             copylen = len;
1121         }
1122         if (copylen > XENSTORE_RING_SIZE - offset) {
1123             copylen = XENSTORE_RING_SIZE - offset;
1124         }
1125 
1126         memcpy(ptr, &s->xs->req[offset], copylen);
1127         copied += copylen;
1128 
1129         ptr += copylen;
1130         len -= copylen;
1131 
1132         cons += copylen;
1133     }
1134 
1135     /*
1136      * Not sure this ever mattered except on Alpha, but this barrier
1137      * is to ensure that the update to req_cons is globally visible
1138      * only after we have consumed all the data from the ring, and we
1139      * don't end up seeing data written to the ring *after* the other
1140      * end sees the update and writes more to the ring. Xen's own
1141      * xenstored has the same barrier here (although with no comment
1142      * at all, obviously, because it's Xen code).
1143      */
1144     smp_mb();
1145 
1146     qatomic_set(&s->xs->req_cons, cons);
1147 
1148     return copied;
1149 }
1150 
1151 static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
1152                                  unsigned int len)
1153 {
1154     if (!len) {
1155         return 0;
1156     }
1157 
1158     XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons);
1159     XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod);
1160     unsigned int copied = 0;
1161 
1162     /*
1163      * This matches the barrier in copy_to_ring() (or the guest's
1164      * equivalent) between writing the data to the ring and updating
1165      * rsp_prod. It protects against the pathological case (which
1166      * again I think never happened except on Alpha) where our
1167      * subsequent writes to the ring could *cross* the read of
1168      * rsp_cons and the guest could see the new data when it was
1169      * intending to read the old.
1170      */
1171     smp_mb();
1172 
1173     while (len) {
1174         unsigned int avail = cons + XENSTORE_RING_SIZE - prod;
1175         unsigned int offset = MASK_XENSTORE_IDX(prod);
1176         unsigned int copylen = len;
1177 
1178         if (avail > XENSTORE_RING_SIZE) {
1179             error_report("XenStore ring handling error");
1180             s->fatal_error = true;
1181             break;
1182         } else if (avail == 0) {
1183             break;
1184         }
1185 
1186         if (copylen > avail) {
1187             copylen = avail;
1188         }
1189         if (copylen > XENSTORE_RING_SIZE - offset) {
1190             copylen = XENSTORE_RING_SIZE - offset;
1191         }
1192 
1193 
1194         memcpy(&s->xs->rsp[offset], ptr, copylen);
1195         copied += copylen;
1196 
1197         ptr += copylen;
1198         len -= copylen;
1199 
1200         prod += copylen;
1201     }
1202 
1203     /* Ensure the ring contents are seen before rsp_prod update. */
1204     smp_wmb();
1205 
1206     qatomic_set(&s->xs->rsp_prod, prod);
1207 
1208     return copied;
1209 }
1210 
1211 static unsigned int get_req(XenXenstoreState *s)
1212 {
1213     unsigned int copied = 0;
1214 
1215     if (s->fatal_error) {
1216         return 0;
1217     }
1218 
1219     assert(!req_pending(s));
1220 
1221     if (s->req_offset < XENSTORE_HEADER_SIZE) {
1222         void *ptr = s->req_data + s->req_offset;
1223         unsigned int len = XENSTORE_HEADER_SIZE;
1224         unsigned int copylen = copy_from_ring(s, ptr, len);
1225 
1226         copied += copylen;
1227         s->req_offset += copylen;
1228     }
1229 
1230     if (s->req_offset >= XENSTORE_HEADER_SIZE) {
1231         struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1232 
1233         if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) {
1234             error_report("Illegal XenStore request");
1235             s->fatal_error = true;
1236             return 0;
1237         }
1238 
1239         void *ptr = s->req_data + s->req_offset;
1240         unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset;
1241         unsigned int copylen = copy_from_ring(s, ptr, len);
1242 
1243         copied += copylen;
1244         s->req_offset += copylen;
1245     }
1246 
1247     return copied;
1248 }
1249 
1250 static unsigned int put_rsp(XenXenstoreState *s)
1251 {
1252     if (s->fatal_error) {
1253         return 0;
1254     }
1255 
1256     assert(s->rsp_pending);
1257 
1258     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1259     assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len);
1260 
1261     void *ptr = s->rsp_data + s->rsp_offset;
1262     unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset;
1263     unsigned int copylen = copy_to_ring(s, ptr, len);
1264 
1265     s->rsp_offset += copylen;
1266 
1267     /* Have we produced a complete response? */
1268     if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) {
1269         reset_rsp(s);
1270     }
1271 
1272     return copylen;
1273 }
1274 
1275 static void deliver_watch(XenXenstoreState *s, const char *path,
1276                           const char *token)
1277 {
1278     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1279     uint8_t *rsp_data = (uint8_t *)&rsp[1];
1280     unsigned int len;
1281 
1282     assert(!s->rsp_pending);
1283 
1284     trace_xenstore_watch_event(path, token);
1285 
1286     rsp->type = XS_WATCH_EVENT;
1287     rsp->req_id = 0;
1288     rsp->tx_id = 0;
1289     rsp->len = 0;
1290 
1291     len = strlen(path);
1292 
1293     /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */
1294     assert(rsp->len + len < XENSTORE_PAYLOAD_MAX);
1295 
1296     memcpy(&rsp_data[rsp->len], path, len);
1297     rsp->len += len;
1298     rsp_data[rsp->len] = '\0';
1299     rsp->len++;
1300 
1301     len = strlen(token);
1302     /*
1303      * It is possible for the guest to have chosen a token that will
1304      * not fit (along with the patch) into a watch event. We have no
1305      * choice but to drop the event if this is the case.
1306      */
1307     if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) {
1308         return;
1309     }
1310 
1311     memcpy(&rsp_data[rsp->len], token, len);
1312     rsp->len += len;
1313     rsp_data[rsp->len] = '\0';
1314     rsp->len++;
1315 
1316     s->rsp_pending = true;
1317 }
1318 
1319 struct watch_event {
1320     char *path;
1321     char *token;
1322 };
1323 
1324 static void free_watch_event(struct watch_event *ev)
1325 {
1326     if (ev) {
1327         g_free(ev->path);
1328         g_free(ev->token);
1329         g_free(ev);
1330     }
1331 }
1332 
1333 static void queue_watch(XenXenstoreState *s, const char *path,
1334                         const char *token)
1335 {
1336     struct watch_event *ev = g_new0(struct watch_event, 1);
1337 
1338     ev->path = g_strdup(path);
1339     ev->token = g_strdup(token);
1340 
1341     s->watch_events = g_list_append(s->watch_events, ev);
1342 }
1343 
1344 static void fire_watch_cb(void *opaque, const char *path, const char *token)
1345 {
1346     XenXenstoreState *s = opaque;
1347 
1348     assert(bql_locked());
1349 
1350     /*
1351      * If there's a response pending, we obviously can't scribble over
1352      * it. But if there's a request pending, it has dibs on the buffer
1353      * too.
1354      *
1355      * In the common case of a watch firing due to backend activity
1356      * when the ring was otherwise idle, we should be able to copy the
1357      * strings directly into the rsp_data and thence the actual ring,
1358      * without needing to perform any allocations and queue them.
1359      */
1360     if (s->rsp_pending || req_pending(s)) {
1361         queue_watch(s, path, token);
1362     } else {
1363         deliver_watch(s, path, token);
1364         /*
1365          * Attempt to queue the message into the actual ring, and send
1366          * the event channel notification if any bytes are copied.
1367          */
1368         if (s->rsp_pending && put_rsp(s) > 0) {
1369             xen_be_evtchn_notify(s->eh, s->be_port);
1370         }
1371     }
1372 }
1373 
1374 static void process_watch_events(XenXenstoreState *s)
1375 {
1376     struct watch_event *ev = s->watch_events->data;
1377 
1378     deliver_watch(s, ev->path, ev->token);
1379 
1380     s->watch_events = g_list_remove(s->watch_events, ev);
1381     free_watch_event(ev);
1382 }
1383 
1384 static void xen_xenstore_event(void *opaque)
1385 {
1386     XenXenstoreState *s = opaque;
1387     evtchn_port_t port = xen_be_evtchn_pending(s->eh);
1388     unsigned int copied_to, copied_from;
1389     bool processed, notify = false;
1390 
1391     if (port != s->be_port) {
1392         return;
1393     }
1394 
1395     /* We know this is a no-op. */
1396     xen_be_evtchn_unmask(s->eh, port);
1397 
1398     do {
1399         copied_to = copied_from = 0;
1400         processed = false;
1401 
1402         if (!s->rsp_pending && s->watch_events) {
1403             process_watch_events(s);
1404         }
1405 
1406         if (s->rsp_pending) {
1407             copied_to = put_rsp(s);
1408         }
1409 
1410         if (!req_pending(s)) {
1411             copied_from = get_req(s);
1412         }
1413 
1414         if (req_pending(s) && !s->rsp_pending && !s->watch_events) {
1415             process_req(s);
1416             processed = true;
1417         }
1418 
1419         notify |= copied_to || copied_from;
1420     } while (copied_to || copied_from || processed);
1421 
1422     if (notify) {
1423         xen_be_evtchn_notify(s->eh, s->be_port);
1424     }
1425 }
1426 
1427 static void alloc_guest_port(XenXenstoreState *s)
1428 {
1429     struct evtchn_alloc_unbound alloc = {
1430         .dom = DOMID_SELF,
1431         .remote_dom = DOMID_QEMU,
1432     };
1433 
1434     if (!xen_evtchn_alloc_unbound_op(&alloc)) {
1435         s->guest_port = alloc.port;
1436     }
1437 }
1438 
1439 int xen_xenstore_reset(void)
1440 {
1441     XenXenstoreState *s = xen_xenstore_singleton;
1442     int console_port;
1443     GList *perms;
1444     int err;
1445 
1446     if (!s) {
1447         return -ENOTSUP;
1448     }
1449 
1450     s->req_offset = s->rsp_offset = 0;
1451     s->rsp_pending = false;
1452 
1453     if (!memory_region_is_mapped(&s->xenstore_page)) {
1454         uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS;
1455         xen_overlay_do_map_page(&s->xenstore_page, gpa);
1456     }
1457 
1458     alloc_guest_port(s);
1459 
1460     /*
1461      * As qemu/dom0, bind to the guest's port. For incoming migration, this
1462      * will be unbound as the guest's evtchn table is overwritten. We then
1463      * rebind to the correct guest port in xen_xenstore_post_load().
1464      */
1465     err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port);
1466     if (err < 0) {
1467         return err;
1468     }
1469     s->be_port = err;
1470 
1471     /* Create frontend store nodes */
1472     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
1473     perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
1474 
1475     relpath_printf(s, perms, "store/port", "%u", s->guest_port);
1476     relpath_printf(s, perms, "store/ring-ref", "%lu",
1477                    XEN_SPECIAL_PFN(XENSTORE));
1478 
1479     console_port = xen_primary_console_get_port();
1480     if (console_port) {
1481         relpath_printf(s, perms, "console/ring-ref", "%lu",
1482                        XEN_SPECIAL_PFN(CONSOLE));
1483         relpath_printf(s, perms, "console/port", "%u", console_port);
1484         relpath_printf(s, perms, "console/state", "%u", XenbusStateInitialised);
1485     }
1486 
1487     g_list_free_full(perms, g_free);
1488 
1489     /*
1490      * We don't actually access the guest's page through the grant, because
1491      * this isn't real Xen, and we can just use the page we gave it in the
1492      * first place. Map the grant anyway, mostly for cosmetic purposes so
1493      * it *looks* like it's in use in the guest-visible grant table.
1494      */
1495     s->gt = qemu_xen_gnttab_open();
1496     uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
1497     s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref,
1498                                              PROT_READ | PROT_WRITE);
1499 
1500     return 0;
1501 }
1502 
1503 struct qemu_xs_handle {
1504     XenstoreImplState *impl;
1505     GList *watches;
1506     QEMUBH *watch_bh;
1507 };
1508 
1509 struct qemu_xs_watch {
1510     struct qemu_xs_handle *h;
1511     char *path;
1512     xs_watch_fn fn;
1513     void *opaque;
1514     GList *events;
1515 };
1516 
1517 static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
1518 {
1519     return g_strdup_printf("/local/domain/%u", domid);
1520 }
1521 
1522 static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
1523                               const char *path, unsigned int *num)
1524 {
1525     GList *items = NULL, *l;
1526     unsigned int i = 0;
1527     char **items_ret;
1528     int err;
1529 
1530     err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items);
1531     if (err) {
1532         errno = err;
1533         return NULL;
1534     }
1535 
1536     items_ret = g_new0(char *, g_list_length(items) + 1);
1537     *num = 0;
1538     for (l = items; l; l = l->next) {
1539         items_ret[i++] = l->data;
1540         (*num)++;
1541     }
1542     g_list_free(items);
1543     return items_ret;
1544 }
1545 
1546 static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
1547                         const char *path, unsigned int *len)
1548 {
1549     GByteArray *data = g_byte_array_new();
1550     bool free_segment = false;
1551     int err;
1552 
1553     err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1554     if (err) {
1555         free_segment = true;
1556         errno = err;
1557     } else {
1558         if (len) {
1559             *len = data->len;
1560         }
1561         /* The xen-bus-helper code expects to get NUL terminated string! */
1562         g_byte_array_append(data, (void *)"", 1);
1563     }
1564 
1565     return g_byte_array_free(data, free_segment);
1566 }
1567 
1568 static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
1569                         const char *path, const void *data, unsigned int len)
1570 {
1571     GByteArray *gdata = g_byte_array_new();
1572     int err;
1573 
1574     g_byte_array_append(gdata, data, len);
1575     err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
1576     g_byte_array_unref(gdata);
1577     if (err) {
1578         errno = err;
1579         return false;
1580     }
1581     return true;
1582 }
1583 
1584 static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
1585                          unsigned int owner, unsigned int domid,
1586                          unsigned int perms, const char *path)
1587 {
1588     g_autoptr(GByteArray) data = g_byte_array_new();
1589     GList *perms_list = NULL;
1590     int err;
1591 
1592     /* mkdir does this */
1593     err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1594     if (err == ENOENT) {
1595         err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data);
1596     }
1597     if (err) {
1598         errno = err;
1599         return false;
1600     }
1601 
1602     perms_list = g_list_append(perms_list,
1603                                xs_perm_as_string(XS_PERM_NONE, owner));
1604     perms_list = g_list_append(perms_list,
1605                                xs_perm_as_string(perms, domid));
1606 
1607     err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list);
1608     g_list_free_full(perms_list, g_free);
1609     if (err) {
1610         errno = err;
1611         return false;
1612     }
1613     return true;
1614 }
1615 
1616 static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
1617                           const char *path)
1618 {
1619     int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path);
1620     if (err) {
1621         errno = err;
1622         return false;
1623     }
1624     return true;
1625 }
1626 
1627 static void be_watch_bh(void *_h)
1628 {
1629     struct qemu_xs_handle *h = _h;
1630     GList *l;
1631 
1632     for (l = h->watches; l; l = l->next) {
1633         struct qemu_xs_watch *w = l->data;
1634 
1635         while (w->events) {
1636             struct watch_event *ev = w->events->data;
1637 
1638             w->fn(w->opaque, ev->path);
1639 
1640             w->events = g_list_remove(w->events, ev);
1641             free_watch_event(ev);
1642         }
1643     }
1644 }
1645 
1646 static void xs_be_watch_cb(void *opaque, const char *path, const char *token)
1647 {
1648     struct watch_event *ev = g_new0(struct watch_event, 1);
1649     struct qemu_xs_watch *w = opaque;
1650 
1651     /* We don't care about the token */
1652     ev->path = g_strdup(path);
1653     w->events = g_list_append(w->events, ev);
1654 
1655     qemu_bh_schedule(w->h->watch_bh);
1656 }
1657 
1658 static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h,
1659                                          const char *path, xs_watch_fn fn,
1660                                          void *opaque)
1661 {
1662     struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
1663     int err;
1664 
1665     w->h = h;
1666     w->fn = fn;
1667     w->opaque = opaque;
1668 
1669     err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w);
1670     if (err) {
1671         errno = err;
1672         g_free(w);
1673         return NULL;
1674     }
1675 
1676     w->path = g_strdup(path);
1677     h->watches = g_list_append(h->watches, w);
1678     return w;
1679 }
1680 
1681 static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
1682 {
1683     xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w);
1684 
1685     h->watches = g_list_remove(h->watches, w);
1686     g_list_free_full(w->events, (GDestroyNotify)free_watch_event);
1687     g_free(w->path);
1688     g_free(w);
1689 }
1690 
1691 static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h)
1692 {
1693     unsigned int new_tx = XBT_NULL;
1694     int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx);
1695     if (err) {
1696         errno = err;
1697         return XBT_NULL;
1698     }
1699     return new_tx;
1700 }
1701 
1702 static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
1703                                   bool abort)
1704 {
1705     int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort);
1706     if (err) {
1707         errno = err;
1708         return false;
1709     }
1710     return true;
1711 }
1712 
1713 static struct qemu_xs_handle *xs_be_open(void)
1714 {
1715     XenXenstoreState *s = xen_xenstore_singleton;
1716     struct qemu_xs_handle *h;
1717 
1718     if (!s || !s->impl) {
1719         errno = -ENOSYS;
1720         return NULL;
1721     }
1722 
1723     h = g_new0(struct qemu_xs_handle, 1);
1724     h->impl = s->impl;
1725 
1726     h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h);
1727 
1728     return h;
1729 }
1730 
1731 static void xs_be_close(struct qemu_xs_handle *h)
1732 {
1733     while (h->watches) {
1734         struct qemu_xs_watch *w = h->watches->data;
1735         xs_be_unwatch(h, w);
1736     }
1737 
1738     qemu_bh_delete(h->watch_bh);
1739     g_free(h);
1740 }
1741 
1742 static struct xenstore_backend_ops emu_xenstore_backend_ops = {
1743     .open = xs_be_open,
1744     .close = xs_be_close,
1745     .get_domain_path = xs_be_get_domain_path,
1746     .directory = xs_be_directory,
1747     .read = xs_be_read,
1748     .write = xs_be_write,
1749     .create = xs_be_create,
1750     .destroy = xs_be_destroy,
1751     .watch = xs_be_watch,
1752     .unwatch = xs_be_unwatch,
1753     .transaction_start = xs_be_transaction_start,
1754     .transaction_end = xs_be_transaction_end,
1755 };
1756