xref: /qemu/hw/i386/kvm/xen_xenstore.c (revision 06b40d250ecfa1633209c2e431a7a38acfd03a98)
1 /*
2  * QEMU Xen emulation: Shared/overlay pages support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "qemu/host-utils.h"
15 #include "qemu/module.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qapi/error.h"
20 #include "qom/object.h"
21 #include "migration/vmstate.h"
22 
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_evtchn.h"
28 #include "xen_primary_console.h"
29 #include "xen_xenstore.h"
30 
31 #include "system/kvm.h"
32 #include "system/kvm_xen.h"
33 
34 #include "trace.h"
35 
36 #include "xenstore_impl.h"
37 
38 #include "hw/xen/interface/io/xs_wire.h"
39 #include "hw/xen/interface/event_channel.h"
40 #include "hw/xen/interface/grant_table.h"
41 
42 #define TYPE_XEN_XENSTORE "xen-xenstore"
43 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
44 
45 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
46 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
47 
48 #define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg))
49 
50 struct XenXenstoreState {
51     /*< private >*/
52     SysBusDevice busdev;
53     /*< public >*/
54 
55     XenstoreImplState *impl;
56     GList *watch_events; /* for the guest */
57 
58     MemoryRegion xenstore_page;
59     struct xenstore_domain_interface *xs;
60     uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
61     uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
62     uint32_t req_offset;
63     uint32_t rsp_offset;
64     bool rsp_pending;
65     bool fatal_error;
66 
67     evtchn_port_t guest_port;
68     evtchn_port_t be_port;
69     struct xenevtchn_handle *eh;
70 
71     uint8_t *impl_state;
72     uint32_t impl_state_size;
73 
74     struct xengntdev_handle *gt;
75     void *granted_xs;
76 };
77 
78 struct XenXenstoreState *xen_xenstore_singleton;
79 
80 static void xen_xenstore_event(void *opaque);
81 static void fire_watch_cb(void *opaque, const char *path, const char *token);
82 
83 static struct xenstore_backend_ops emu_xenstore_backend_ops;
84 
relpath_printf(XenXenstoreState * s,GList * perms,const char * relpath,const char * fmt,...)85 static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
86                                                 GList *perms,
87                                                 const char *relpath,
88                                                 const char *fmt, ...)
89 {
90     gchar *abspath;
91     gchar *value;
92     va_list args;
93     GByteArray *data;
94     int err;
95 
96     abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
97     va_start(args, fmt);
98     value = g_strdup_vprintf(fmt, args);
99     va_end(args);
100 
101     data = g_byte_array_new_take((void *)value, strlen(value));
102 
103     err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
104     assert(!err);
105 
106     g_byte_array_unref(data);
107 
108     err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
109     assert(!err);
110 
111     g_free(abspath);
112 }
113 
xen_xenstore_realize(DeviceState * dev,Error ** errp)114 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
115 {
116     XenXenstoreState *s = XEN_XENSTORE(dev);
117     GList *perms;
118 
119     if (xen_mode != XEN_EMULATE) {
120         error_setg(errp, "Xen xenstore support is for Xen emulation");
121         return;
122     }
123     memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page",
124                            XEN_PAGE_SIZE, &error_abort);
125     memory_region_set_enabled(&s->xenstore_page, true);
126     s->xs = memory_region_get_ram_ptr(&s->xenstore_page);
127     memset(s->xs, 0, XEN_PAGE_SIZE);
128 
129     /* We can't map it this early as KVM isn't ready */
130     xen_xenstore_singleton = s;
131 
132     s->eh = xen_be_evtchn_open();
133     if (!s->eh) {
134         error_setg(errp, "Xenstore evtchn port init failed");
135         return;
136     }
137     aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh),
138                        xen_xenstore_event, NULL, NULL, NULL, s);
139 
140     s->impl = xs_impl_create(xen_domid);
141 
142     /* Populate the default nodes */
143 
144     /* Nodes owned by 'dom0' but readable by the guest */
145     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
146     perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
147 
148     relpath_printf(s, perms, "", "%s", "");
149 
150     relpath_printf(s, perms, "domid", "%u", xen_domid);
151 
152     relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1);
153     relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1);
154 
155     relpath_printf(s, perms, "platform/acpi", "%u", 1);
156     relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
157     relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
158     relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
159 
160     g_list_free_full(perms, g_free);
161 
162     /* Nodes owned by the guest */
163     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
164 
165     relpath_printf(s, perms, "attr", "%s", "");
166 
167     relpath_printf(s, perms, "control/shutdown", "%s", "");
168     relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
169     relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
170     relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
171     relpath_printf(s, perms, "control/feature-s3", "%u", 1);
172     relpath_printf(s, perms, "control/feature-s4", "%u", 1);
173 
174     relpath_printf(s, perms, "data", "%s", "");
175     relpath_printf(s, perms, "device", "%s", "");
176     relpath_printf(s, perms, "drivers", "%s", "");
177     relpath_printf(s, perms, "error", "%s", "");
178     relpath_printf(s, perms, "feature", "%s", "");
179 
180     g_list_free_full(perms, g_free);
181 
182     xen_xenstore_ops = &emu_xenstore_backend_ops;
183 }
184 
xen_xenstore_is_needed(void * opaque)185 static bool xen_xenstore_is_needed(void *opaque)
186 {
187     return xen_mode == XEN_EMULATE;
188 }
189 
xen_xenstore_pre_save(void * opaque)190 static int xen_xenstore_pre_save(void *opaque)
191 {
192     XenXenstoreState *s = opaque;
193     GByteArray *save;
194 
195     if (s->eh) {
196         s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
197     }
198 
199     g_free(s->impl_state);
200     save = xs_impl_serialize(s->impl);
201     s->impl_state = save->data;
202     s->impl_state_size = save->len;
203     g_byte_array_free(save, false);
204 
205     return 0;
206 }
207 
xen_xenstore_post_load(void * opaque,int ver)208 static int xen_xenstore_post_load(void *opaque, int ver)
209 {
210     XenXenstoreState *s = opaque;
211     GByteArray *save;
212 
213     /*
214      * As qemu/dom0, rebind to the guest's port. The Windows drivers may
215      * unbind the XenStore evtchn and rebind to it, having obtained the
216      * "remote" port through EVTCHNOP_status. In the case that migration
217      * occurs while it's unbound, the "remote" port needs to be the same
218      * as before so that the guest can find it, but should remain unbound.
219      */
220     if (s->guest_port) {
221         int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid,
222                                                      s->guest_port);
223         if (be_port < 0) {
224             return be_port;
225         }
226         s->be_port = be_port;
227     }
228 
229     save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
230     s->impl_state = NULL;
231     s->impl_state_size = 0;
232 
233     return xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
234 }
235 
236 static const VMStateDescription xen_xenstore_vmstate = {
237     .name = "xen_xenstore",
238     .unmigratable = 1, /* The PV back ends don't migrate yet */
239     .version_id = 1,
240     .minimum_version_id = 1,
241     .needed = xen_xenstore_is_needed,
242     .pre_save = xen_xenstore_pre_save,
243     .post_load = xen_xenstore_post_load,
244     .fields = (const VMStateField[]) {
245         VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState,
246                             sizeof_field(XenXenstoreState, req_data)),
247         VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState,
248                             sizeof_field(XenXenstoreState, rsp_data)),
249         VMSTATE_UINT32(req_offset, XenXenstoreState),
250         VMSTATE_UINT32(rsp_offset, XenXenstoreState),
251         VMSTATE_BOOL(rsp_pending, XenXenstoreState),
252         VMSTATE_UINT32(guest_port, XenXenstoreState),
253         VMSTATE_BOOL(fatal_error, XenXenstoreState),
254         VMSTATE_UINT32(impl_state_size, XenXenstoreState),
255         VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
256                                     impl_state_size, 0,
257                                     vmstate_info_uint8, uint8_t),
258         VMSTATE_END_OF_LIST()
259     }
260 };
261 
xen_xenstore_class_init(ObjectClass * klass,const void * data)262 static void xen_xenstore_class_init(ObjectClass *klass, const void *data)
263 {
264     DeviceClass *dc = DEVICE_CLASS(klass);
265 
266     dc->realize = xen_xenstore_realize;
267     dc->vmsd = &xen_xenstore_vmstate;
268 }
269 
270 static const TypeInfo xen_xenstore_info = {
271     .name          = TYPE_XEN_XENSTORE,
272     .parent        = TYPE_SYS_BUS_DEVICE,
273     .instance_size = sizeof(XenXenstoreState),
274     .class_init    = xen_xenstore_class_init,
275 };
276 
xen_xenstore_create(void)277 void xen_xenstore_create(void)
278 {
279     DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL);
280 
281     xen_xenstore_singleton = XEN_XENSTORE(dev);
282 
283     /*
284      * Defer the init (xen_xenstore_reset()) until KVM is set up and the
285      * overlay page can be mapped.
286      */
287 }
288 
xen_xenstore_register_types(void)289 static void xen_xenstore_register_types(void)
290 {
291     type_register_static(&xen_xenstore_info);
292 }
293 
type_init(xen_xenstore_register_types)294 type_init(xen_xenstore_register_types)
295 
296 uint16_t xen_xenstore_get_port(void)
297 {
298     XenXenstoreState *s = xen_xenstore_singleton;
299     if (!s) {
300         return 0;
301     }
302     return s->guest_port;
303 }
304 
req_pending(XenXenstoreState * s)305 static bool req_pending(XenXenstoreState *s)
306 {
307     struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
308 
309     return s->req_offset == XENSTORE_HEADER_SIZE + req->len;
310 }
311 
reset_req(XenXenstoreState * s)312 static void reset_req(XenXenstoreState *s)
313 {
314     memset(s->req_data, 0, sizeof(s->req_data));
315     s->req_offset = 0;
316 }
317 
reset_rsp(XenXenstoreState * s)318 static void reset_rsp(XenXenstoreState *s)
319 {
320     s->rsp_pending = false;
321 
322     memset(s->rsp_data, 0, sizeof(s->rsp_data));
323     s->rsp_offset = 0;
324 }
325 
xs_error(XenXenstoreState * s,unsigned int id,xs_transaction_t tx_id,int errnum)326 static void xs_error(XenXenstoreState *s, unsigned int id,
327                      xs_transaction_t tx_id, int errnum)
328 {
329     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
330     const char *errstr = NULL;
331 
332     for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
333         const struct xsd_errors *xsd_error = &xsd_errors[i];
334 
335         if (xsd_error->errnum == errnum) {
336             errstr = xsd_error->errstring;
337             break;
338         }
339     }
340     assert(errstr);
341 
342     trace_xenstore_error(id, tx_id, errstr);
343 
344     rsp->type = XS_ERROR;
345     rsp->req_id = id;
346     rsp->tx_id = tx_id;
347     rsp->len = (uint32_t)strlen(errstr) + 1;
348 
349     memcpy(&rsp[1], errstr, rsp->len);
350 }
351 
xs_ok(XenXenstoreState * s,unsigned int type,unsigned int req_id,xs_transaction_t tx_id)352 static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
353                   xs_transaction_t tx_id)
354 {
355     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
356     const char *okstr = "OK";
357 
358     rsp->type = type;
359     rsp->req_id = req_id;
360     rsp->tx_id = tx_id;
361     rsp->len = (uint32_t)strlen(okstr) + 1;
362 
363     memcpy(&rsp[1], okstr, rsp->len);
364 }
365 
366 /*
367  * The correct request and response formats are documented in xen.git:
368  * docs/misc/xenstore.txt. A summary is given below for convenience.
369  * The '|' symbol represents a NUL character.
370  *
371  * ---------- Database read, write and permissions operations ----------
372  *
373  * READ                    <path>|                 <value|>
374  * WRITE                   <path>|<value|>
375  *         Store and read the octet string <value> at <path>.
376  *         WRITE creates any missing parent paths, with empty values.
377  *
378  * MKDIR                   <path>|
379  *         Ensures that the <path> exists, by necessary by creating
380  *         it and any missing parents with empty values.  If <path>
381  *         or any parent already exists, its value is left unchanged.
382  *
383  * RM                      <path>|
384  *         Ensures that the <path> does not exist, by deleting
385  *         it and all of its children.  It is not an error if <path> does
386  *         not exist, but it _is_ an error if <path>'s immediate parent
387  *         does not exist either.
388  *
389  * DIRECTORY               <path>|                 <child-leaf-name>|*
390  *         Gives a list of the immediate children of <path>, as only the
391  *         leafnames.  The resulting children are each named
392  *         <path>/<child-leaf-name>.
393  *
394  * DIRECTORY_PART          <path>|<offset>         <gencnt>|<child-leaf-name>|*
395  *         Same as DIRECTORY, but to be used for children lists longer than
396  *         XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into
397  *         the list of children to return. Return values are the generation
398  *         count <gencnt> of the node (to be used to ensure the node hasn't
399  *         changed between two reads: <gencnt> being the same for multiple
400  *         reads guarantees the node hasn't changed) and the list of children
401  *         starting at the specified <offset> of the complete list.
402  *
403  * GET_PERMS               <path>|                 <perm-as-string>|+
404  * SET_PERMS               <path>|<perm-as-string>|+?
405  *         <perm-as-string> is one of the following
406  *                 w<domid>        write only
407  *                 r<domid>        read only
408  *                 b<domid>        both read and write
409  *                 n<domid>        no access
410  *         See https://wiki.xen.org/wiki/XenBus section
411  *         `Permissions' for details of the permissions system.
412  *         It is possible to set permissions for the special watch paths
413  *         "@introduceDomain" and "@releaseDomain" to enable receiving those
414  *         watches in unprivileged domains.
415  *
416  * ---------- Watches ----------
417  *
418  * WATCH                   <wpath>|<token>|?
419  *         Adds a watch.
420  *
421  *         When a <path> is modified (including path creation, removal,
422  *         contents change or permissions change) this generates an event
423  *         on the changed <path>.  Changes made in transactions cause an
424  *         event only if and when committed.  Each occurring event is
425  *         matched against all the watches currently set up, and each
426  *         matching watch results in a WATCH_EVENT message (see below).
427  *
428  *         The event's path matches the watch's <wpath> if it is an child
429  *         of <wpath>.
430  *
431  *         <wpath> can be a <path> to watch or @<wspecial>.  In the
432  *         latter case <wspecial> may have any syntax but it matches
433  *         (according to the rules above) only the following special
434  *         events which are invented by xenstored:
435  *             @introduceDomain    occurs on INTRODUCE
436  *             @releaseDomain      occurs on any domain crash or
437  *                                 shutdown, and also on RELEASE
438  *                                 and domain destruction
439  *         <wspecial> events are sent to privileged callers or explicitly
440  *         via SET_PERMS enabled domains only.
441  *
442  *         When a watch is first set up it is triggered once straight
443  *         away, with <path> equal to <wpath>.  Watches may be triggered
444  *         spuriously.  The tx_id in a WATCH request is ignored.
445  *
446  *         Watches are supposed to be restricted by the permissions
447  *         system but in practice the implementation is imperfect.
448  *         Applications should not rely on being sent a notification for
449  *         paths that they cannot read; however, an application may rely
450  *         on being sent a watch when a path which it _is_ able to read
451  *         is deleted even if that leaves only a nonexistent unreadable
452  *         parent.  A notification may omitted if a node's permissions
453  *         are changed so as to make it unreadable, in which case future
454  *         notifications may be suppressed (and if the node is later made
455  *         readable, some notifications may have been lost).
456  *
457  * WATCH_EVENT                                     <epath>|<token>|
458  *         Unsolicited `reply' generated for matching modification events
459  *         as described above.  req_id and tx_id are both 0.
460  *
461  *         <epath> is the event's path, ie the actual path that was
462  *         modified; however if the event was the recursive removal of an
463  *         parent of <wpath>, <epath> is just
464  *         <wpath> (rather than the actual path which was removed).  So
465  *         <epath> is a child of <wpath>, regardless.
466  *
467  *         Iff <wpath> for the watch was specified as a relative pathname,
468  *         the <epath> path will also be relative (with the same base,
469  *         obviously).
470  *
471  * UNWATCH                 <wpath>|<token>|?
472  *
473  * RESET_WATCHES           |
474  *         Reset all watches and transactions of the caller.
475  *
476  * ---------- Transactions ----------
477  *
478  * TRANSACTION_START       |                       <transid>|
479  *         <transid> is an opaque uint32_t allocated by xenstored
480  *         represented as unsigned decimal.  After this, transaction may
481  *         be referenced by using <transid> (as 32-bit binary) in the
482  *         tx_id request header field.  When transaction is started whole
483  *         db is copied; reads and writes happen on the copy.
484  *         It is not legal to send non-0 tx_id in TRANSACTION_START.
485  *
486  * TRANSACTION_END         T|
487  * TRANSACTION_END         F|
488  *         tx_id must refer to existing transaction.  After this
489  *         request the tx_id is no longer valid and may be reused by
490  *         xenstore.  If F, the transaction is discarded.  If T,
491  *         it is committed: if there were any other intervening writes
492  *         then our END gets get EAGAIN.
493  *
494  *         The plan is that in the future only intervening `conflicting'
495  *         writes cause EAGAIN, meaning only writes or other commits
496  *         which changed paths which were read or written in the
497  *         transaction at hand.
498  *
499  */
500 
xs_read(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)501 static void xs_read(XenXenstoreState *s, unsigned int req_id,
502                     xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
503 {
504     const char *path = (const char *)req_data;
505     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
506     uint8_t *rsp_data = (uint8_t *)&rsp[1];
507     g_autoptr(GByteArray) data = g_byte_array_new();
508     int err;
509 
510     if (len == 0 || req_data[len - 1] != '\0') {
511         xs_error(s, req_id, tx_id, EINVAL);
512         return;
513     }
514 
515     trace_xenstore_read(tx_id, path);
516     err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
517     if (err) {
518         xs_error(s, req_id, tx_id, err);
519         return;
520     }
521 
522     rsp->type = XS_READ;
523     rsp->req_id = req_id;
524     rsp->tx_id = tx_id;
525     rsp->len = 0;
526 
527     len = data->len;
528     if (len > XENSTORE_PAYLOAD_MAX) {
529         xs_error(s, req_id, tx_id, E2BIG);
530         return;
531     }
532 
533     if (!len) {
534         return;
535     }
536 
537     memcpy(&rsp_data[rsp->len], data->data, len);
538     rsp->len += len;
539 }
540 
xs_write(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)541 static void xs_write(XenXenstoreState *s, unsigned int req_id,
542                      xs_transaction_t tx_id, uint8_t *req_data,
543                      unsigned int len)
544 {
545     g_autoptr(GByteArray) data = g_byte_array_new();
546     const char *path;
547     int err;
548 
549     if (len == 0) {
550         xs_error(s, req_id, tx_id, EINVAL);
551         return;
552     }
553 
554     path = (const char *)req_data;
555 
556     while (len--) {
557         if (*req_data++ == '\0') {
558             break;
559         }
560         if (len == 0) {
561             xs_error(s, req_id, tx_id, EINVAL);
562             return;
563         }
564     }
565 
566     g_byte_array_append(data, req_data, len);
567 
568     trace_xenstore_write(tx_id, path);
569     err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
570     if (err) {
571         xs_error(s, req_id, tx_id, err);
572         return;
573     }
574 
575     xs_ok(s, XS_WRITE, req_id, tx_id);
576 }
577 
xs_mkdir(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)578 static void xs_mkdir(XenXenstoreState *s, unsigned int req_id,
579                      xs_transaction_t tx_id, uint8_t *req_data,
580                      unsigned int len)
581 {
582     g_autoptr(GByteArray) data = g_byte_array_new();
583     const char *path;
584     int err;
585 
586     if (len == 0 || req_data[len - 1] != '\0') {
587         xs_error(s, req_id, tx_id, EINVAL);
588         return;
589     }
590 
591     path = (const char *)req_data;
592 
593     trace_xenstore_mkdir(tx_id, path);
594     err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
595     if (err == ENOENT) {
596         err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
597     }
598 
599     if (!err) {
600         xs_error(s, req_id, tx_id, err);
601         return;
602     }
603 
604     xs_ok(s, XS_MKDIR, req_id, tx_id);
605 }
606 
xs_append_strings(XenXenstoreState * s,struct xsd_sockmsg * rsp,GList * strings,unsigned int start,bool truncate)607 static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp,
608                               GList *strings, unsigned int start, bool truncate)
609 {
610     uint8_t *rsp_data = (uint8_t *)&rsp[1];
611     GList *l;
612 
613     for (l = strings; l; l = l->next) {
614         size_t len = strlen(l->data) + 1; /* Including the NUL termination */
615         char *str = l->data;
616 
617         if (rsp->len + len > XENSTORE_PAYLOAD_MAX) {
618             if (truncate) {
619                 len = XENSTORE_PAYLOAD_MAX - rsp->len;
620                 if (!len) {
621                     return;
622                 }
623             } else {
624                 xs_error(s, rsp->req_id, rsp->tx_id, E2BIG);
625                 return;
626             }
627         }
628 
629         if (start) {
630             if (start >= len) {
631                 start -= len;
632                 continue;
633             }
634 
635             str += start;
636             len -= start;
637             start = 0;
638         }
639 
640         memcpy(&rsp_data[rsp->len], str, len);
641         rsp->len += len;
642     }
643     /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */
644     if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) {
645         rsp_data[rsp->len++] = '\0';
646     }
647 }
648 
xs_directory(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)649 static void xs_directory(XenXenstoreState *s, unsigned int req_id,
650                          xs_transaction_t tx_id, uint8_t *req_data,
651                          unsigned int len)
652 {
653     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
654     GList *items = NULL;
655     const char *path;
656     int err;
657 
658     if (len == 0 || req_data[len - 1] != '\0') {
659         xs_error(s, req_id, tx_id, EINVAL);
660         return;
661     }
662 
663     path = (const char *)req_data;
664 
665     trace_xenstore_directory(tx_id, path);
666     err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items);
667     if (err != 0) {
668         xs_error(s, req_id, tx_id, err);
669         return;
670     }
671 
672     rsp->type = XS_DIRECTORY;
673     rsp->req_id = req_id;
674     rsp->tx_id = tx_id;
675     rsp->len = 0;
676 
677     xs_append_strings(s, rsp, items, 0, false);
678 
679     g_list_free_full(items, g_free);
680 }
681 
xs_directory_part(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)682 static void xs_directory_part(XenXenstoreState *s, unsigned int req_id,
683                               xs_transaction_t tx_id, uint8_t *req_data,
684                               unsigned int len)
685 {
686     const char *offset_str, *path = (const char *)req_data;
687     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
688     char *rsp_data = (char *)&rsp[1];
689     uint64_t gencnt = 0;
690     unsigned int offset;
691     GList *items = NULL;
692     int err;
693 
694     if (len == 0) {
695         xs_error(s, req_id, tx_id, EINVAL);
696         return;
697     }
698 
699     while (len--) {
700         if (*req_data++ == '\0') {
701             break;
702         }
703         if (len == 0) {
704             xs_error(s, req_id, tx_id, EINVAL);
705             return;
706         }
707     }
708 
709     offset_str = (const char *)req_data;
710     while (len--) {
711         if (*req_data++ == '\0') {
712             break;
713         }
714         if (len == 0) {
715             xs_error(s, req_id, tx_id, EINVAL);
716             return;
717         }
718     }
719 
720     if (len) {
721         xs_error(s, req_id, tx_id, EINVAL);
722         return;
723     }
724 
725     if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) {
726         xs_error(s, req_id, tx_id, EINVAL);
727         return;
728     }
729 
730     trace_xenstore_directory_part(tx_id, path, offset);
731     err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items);
732     if (err != 0) {
733         xs_error(s, req_id, tx_id, err);
734         return;
735     }
736 
737     rsp->type = XS_DIRECTORY_PART;
738     rsp->req_id = req_id;
739     rsp->tx_id = tx_id;
740     rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1;
741 
742     xs_append_strings(s, rsp, items, offset, true);
743 
744     g_list_free_full(items, g_free);
745 }
746 
xs_transaction_start(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)747 static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id,
748                                  xs_transaction_t tx_id, uint8_t *req_data,
749                                  unsigned int len)
750 {
751     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
752     char *rsp_data = (char *)&rsp[1];
753     int err;
754 
755     if (len != 1 || req_data[0] != '\0') {
756         xs_error(s, req_id, tx_id, EINVAL);
757         return;
758     }
759 
760     rsp->type = XS_TRANSACTION_START;
761     rsp->req_id = req_id;
762     rsp->tx_id = tx_id;
763     rsp->len = 0;
764 
765     err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id);
766     if (err) {
767         xs_error(s, req_id, tx_id, err);
768         return;
769     }
770 
771     trace_xenstore_transaction_start(tx_id);
772 
773     rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id);
774     assert(rsp->len < XENSTORE_PAYLOAD_MAX);
775     rsp->len++;
776 }
777 
xs_transaction_end(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)778 static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id,
779                                xs_transaction_t tx_id, uint8_t *req_data,
780                                unsigned int len)
781 {
782     bool commit;
783     int err;
784 
785     if (len != 2 || req_data[1] != '\0') {
786         xs_error(s, req_id, tx_id, EINVAL);
787         return;
788     }
789 
790     switch (req_data[0]) {
791     case 'T':
792         commit = true;
793         break;
794     case 'F':
795         commit = false;
796         break;
797     default:
798         xs_error(s, req_id, tx_id, EINVAL);
799         return;
800     }
801 
802     trace_xenstore_transaction_end(tx_id, commit);
803     err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit);
804     if (err) {
805         xs_error(s, req_id, tx_id, err);
806         return;
807     }
808 
809     xs_ok(s, XS_TRANSACTION_END, req_id, tx_id);
810 }
811 
xs_rm(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)812 static void xs_rm(XenXenstoreState *s, unsigned int req_id,
813                   xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
814 {
815     const char *path = (const char *)req_data;
816     int err;
817 
818     if (len == 0 || req_data[len - 1] != '\0') {
819         xs_error(s, req_id, tx_id, EINVAL);
820         return;
821     }
822 
823     trace_xenstore_rm(tx_id, path);
824     err = xs_impl_rm(s->impl, xen_domid, tx_id, path);
825     if (err) {
826         xs_error(s, req_id, tx_id, err);
827         return;
828     }
829 
830     xs_ok(s, XS_RM, req_id, tx_id);
831 }
832 
xs_get_perms(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)833 static void xs_get_perms(XenXenstoreState *s, unsigned int req_id,
834                          xs_transaction_t tx_id, uint8_t *req_data,
835                          unsigned int len)
836 {
837     const char *path = (const char *)req_data;
838     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
839     GList *perms = NULL;
840     int err;
841 
842     if (len == 0 || req_data[len - 1] != '\0') {
843         xs_error(s, req_id, tx_id, EINVAL);
844         return;
845     }
846 
847     trace_xenstore_get_perms(tx_id, path);
848     err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms);
849     if (err) {
850         xs_error(s, req_id, tx_id, err);
851         return;
852     }
853 
854     rsp->type = XS_GET_PERMS;
855     rsp->req_id = req_id;
856     rsp->tx_id = tx_id;
857     rsp->len = 0;
858 
859     xs_append_strings(s, rsp, perms, 0, false);
860 
861     g_list_free_full(perms, g_free);
862 }
863 
xs_set_perms(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)864 static void xs_set_perms(XenXenstoreState *s, unsigned int req_id,
865                          xs_transaction_t tx_id, uint8_t *req_data,
866                          unsigned int len)
867 {
868     const char *path = (const char *)req_data;
869     uint8_t *perm;
870     GList *perms = NULL;
871     int err;
872 
873     if (len == 0) {
874         xs_error(s, req_id, tx_id, EINVAL);
875         return;
876     }
877 
878     while (len--) {
879         if (*req_data++ == '\0') {
880             break;
881         }
882         if (len == 0) {
883             xs_error(s, req_id, tx_id, EINVAL);
884             return;
885         }
886     }
887 
888     perm = req_data;
889     while (len--) {
890         if (*req_data++ == '\0') {
891             perms = g_list_append(perms, perm);
892             perm = req_data;
893         }
894     }
895 
896     /*
897      * Note that there may be trailing garbage at the end of the buffer.
898      * This is explicitly permitted by the '?' at the end of the definition:
899      *
900      *    SET_PERMS         <path>|<perm-as-string>|+?
901      */
902 
903     trace_xenstore_set_perms(tx_id, path);
904     err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms);
905     g_list_free(perms);
906     if (err) {
907         xs_error(s, req_id, tx_id, err);
908         return;
909     }
910 
911     xs_ok(s, XS_SET_PERMS, req_id, tx_id);
912 }
913 
xs_watch(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)914 static void xs_watch(XenXenstoreState *s, unsigned int req_id,
915                      xs_transaction_t tx_id, uint8_t *req_data,
916                      unsigned int len)
917 {
918     const char *token, *path = (const char *)req_data;
919     int err;
920 
921     if (len == 0) {
922         xs_error(s, req_id, tx_id, EINVAL);
923         return;
924     }
925 
926     while (len--) {
927         if (*req_data++ == '\0') {
928             break;
929         }
930         if (len == 0) {
931             xs_error(s, req_id, tx_id, EINVAL);
932             return;
933         }
934     }
935 
936     token = (const char *)req_data;
937     while (len--) {
938         if (*req_data++ == '\0') {
939             break;
940         }
941         if (len == 0) {
942             xs_error(s, req_id, tx_id, EINVAL);
943             return;
944         }
945     }
946 
947     /*
948      * Note that there may be trailing garbage at the end of the buffer.
949      * This is explicitly permitted by the '?' at the end of the definition:
950      *
951      *    WATCH             <wpath>|<token>|?
952      */
953 
954     trace_xenstore_watch(path, token);
955     err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s);
956     if (err) {
957         xs_error(s, req_id, tx_id, err);
958         return;
959     }
960 
961     xs_ok(s, XS_WATCH, req_id, tx_id);
962 }
963 
xs_unwatch(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)964 static void xs_unwatch(XenXenstoreState *s, unsigned int req_id,
965                        xs_transaction_t tx_id, uint8_t *req_data,
966                        unsigned int len)
967 {
968     const char *token, *path = (const char *)req_data;
969     int err;
970 
971     if (len == 0) {
972         xs_error(s, req_id, tx_id, EINVAL);
973         return;
974     }
975 
976     while (len--) {
977         if (*req_data++ == '\0') {
978             break;
979         }
980         if (len == 0) {
981             xs_error(s, req_id, tx_id, EINVAL);
982             return;
983         }
984     }
985 
986     token = (const char *)req_data;
987     while (len--) {
988         if (*req_data++ == '\0') {
989             break;
990         }
991         if (len == 0) {
992             xs_error(s, req_id, tx_id, EINVAL);
993             return;
994         }
995     }
996 
997     trace_xenstore_unwatch(path, token);
998     err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s);
999     if (err) {
1000         xs_error(s, req_id, tx_id, err);
1001         return;
1002     }
1003 
1004     xs_ok(s, XS_UNWATCH, req_id, tx_id);
1005 }
1006 
xs_reset_watches(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)1007 static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id,
1008                              xs_transaction_t tx_id, uint8_t *req_data,
1009                              unsigned int len)
1010 {
1011     if (len == 0 || req_data[len - 1] != '\0') {
1012         xs_error(s, req_id, tx_id, EINVAL);
1013         return;
1014     }
1015 
1016     trace_xenstore_reset_watches();
1017     xs_impl_reset_watches(s->impl, xen_domid);
1018 
1019     xs_ok(s, XS_RESET_WATCHES, req_id, tx_id);
1020 }
1021 
xs_priv(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * data,unsigned int len)1022 static void xs_priv(XenXenstoreState *s, unsigned int req_id,
1023                     xs_transaction_t tx_id, uint8_t *data,
1024                     unsigned int len)
1025 {
1026     xs_error(s, req_id, tx_id, EACCES);
1027 }
1028 
xs_unimpl(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * data,unsigned int len)1029 static void xs_unimpl(XenXenstoreState *s, unsigned int req_id,
1030                       xs_transaction_t tx_id, uint8_t *data,
1031                       unsigned int len)
1032 {
1033     xs_error(s, req_id, tx_id, ENOSYS);
1034 }
1035 
1036 typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id,
1037                         xs_transaction_t tx_id, uint8_t *data,
1038                         unsigned int len);
1039 
1040 struct xsd_req {
1041     const char *name;
1042     xs_impl fn;
1043 };
1044 #define XSD_REQ(_type, _fn)                           \
1045     [_type] = { .name = #_type, .fn = _fn }
1046 
1047 struct xsd_req xsd_reqs[] = {
1048     XSD_REQ(XS_READ, xs_read),
1049     XSD_REQ(XS_WRITE, xs_write),
1050     XSD_REQ(XS_MKDIR, xs_mkdir),
1051     XSD_REQ(XS_DIRECTORY, xs_directory),
1052     XSD_REQ(XS_DIRECTORY_PART, xs_directory_part),
1053     XSD_REQ(XS_TRANSACTION_START, xs_transaction_start),
1054     XSD_REQ(XS_TRANSACTION_END, xs_transaction_end),
1055     XSD_REQ(XS_RM, xs_rm),
1056     XSD_REQ(XS_GET_PERMS, xs_get_perms),
1057     XSD_REQ(XS_SET_PERMS, xs_set_perms),
1058     XSD_REQ(XS_WATCH, xs_watch),
1059     XSD_REQ(XS_UNWATCH, xs_unwatch),
1060     XSD_REQ(XS_CONTROL, xs_priv),
1061     XSD_REQ(XS_INTRODUCE, xs_priv),
1062     XSD_REQ(XS_RELEASE, xs_priv),
1063     XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv),
1064     XSD_REQ(XS_RESUME, xs_priv),
1065     XSD_REQ(XS_SET_TARGET, xs_priv),
1066     XSD_REQ(XS_RESET_WATCHES, xs_reset_watches),
1067 };
1068 
process_req(XenXenstoreState * s)1069 static void process_req(XenXenstoreState *s)
1070 {
1071     struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1072     xs_impl handler = NULL;
1073 
1074     assert(req_pending(s));
1075     assert(!s->rsp_pending);
1076 
1077     if (req->type < ARRAY_SIZE(xsd_reqs)) {
1078         handler = xsd_reqs[req->type].fn;
1079     }
1080     if (!handler) {
1081         handler = &xs_unimpl;
1082     }
1083 
1084     handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len);
1085 
1086     s->rsp_pending = true;
1087     reset_req(s);
1088 }
1089 
copy_from_ring(XenXenstoreState * s,uint8_t * ptr,unsigned int len)1090 static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr,
1091                                    unsigned int len)
1092 {
1093     if (!len) {
1094         return 0;
1095     }
1096 
1097     XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod);
1098     XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons);
1099     unsigned int copied = 0;
1100 
1101     /* Ensure the ring contents don't cross the req_prod access. */
1102     smp_rmb();
1103 
1104     while (len) {
1105         unsigned int avail = prod - cons;
1106         unsigned int offset = MASK_XENSTORE_IDX(cons);
1107         unsigned int copylen = avail;
1108 
1109         if (avail > XENSTORE_RING_SIZE) {
1110             error_report("XenStore ring handling error");
1111             s->fatal_error = true;
1112             break;
1113         } else if (avail == 0) {
1114             break;
1115         }
1116 
1117         if (copylen > len) {
1118             copylen = len;
1119         }
1120         if (copylen > XENSTORE_RING_SIZE - offset) {
1121             copylen = XENSTORE_RING_SIZE - offset;
1122         }
1123 
1124         memcpy(ptr, &s->xs->req[offset], copylen);
1125         copied += copylen;
1126 
1127         ptr += copylen;
1128         len -= copylen;
1129 
1130         cons += copylen;
1131     }
1132 
1133     /*
1134      * Not sure this ever mattered except on Alpha, but this barrier
1135      * is to ensure that the update to req_cons is globally visible
1136      * only after we have consumed all the data from the ring, and we
1137      * don't end up seeing data written to the ring *after* the other
1138      * end sees the update and writes more to the ring. Xen's own
1139      * xenstored has the same barrier here (although with no comment
1140      * at all, obviously, because it's Xen code).
1141      */
1142     smp_mb();
1143 
1144     qatomic_set(&s->xs->req_cons, cons);
1145 
1146     return copied;
1147 }
1148 
copy_to_ring(XenXenstoreState * s,uint8_t * ptr,unsigned int len)1149 static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
1150                                  unsigned int len)
1151 {
1152     if (!len) {
1153         return 0;
1154     }
1155 
1156     XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons);
1157     XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod);
1158     unsigned int copied = 0;
1159 
1160     /*
1161      * This matches the barrier in copy_to_ring() (or the guest's
1162      * equivalent) between writing the data to the ring and updating
1163      * rsp_prod. It protects against the pathological case (which
1164      * again I think never happened except on Alpha) where our
1165      * subsequent writes to the ring could *cross* the read of
1166      * rsp_cons and the guest could see the new data when it was
1167      * intending to read the old.
1168      */
1169     smp_mb();
1170 
1171     while (len) {
1172         unsigned int avail = cons + XENSTORE_RING_SIZE - prod;
1173         unsigned int offset = MASK_XENSTORE_IDX(prod);
1174         unsigned int copylen = len;
1175 
1176         if (avail > XENSTORE_RING_SIZE) {
1177             error_report("XenStore ring handling error");
1178             s->fatal_error = true;
1179             break;
1180         } else if (avail == 0) {
1181             break;
1182         }
1183 
1184         if (copylen > avail) {
1185             copylen = avail;
1186         }
1187         if (copylen > XENSTORE_RING_SIZE - offset) {
1188             copylen = XENSTORE_RING_SIZE - offset;
1189         }
1190 
1191 
1192         memcpy(&s->xs->rsp[offset], ptr, copylen);
1193         copied += copylen;
1194 
1195         ptr += copylen;
1196         len -= copylen;
1197 
1198         prod += copylen;
1199     }
1200 
1201     /* Ensure the ring contents are seen before rsp_prod update. */
1202     smp_wmb();
1203 
1204     qatomic_set(&s->xs->rsp_prod, prod);
1205 
1206     return copied;
1207 }
1208 
get_req(XenXenstoreState * s)1209 static unsigned int get_req(XenXenstoreState *s)
1210 {
1211     unsigned int copied = 0;
1212 
1213     if (s->fatal_error) {
1214         return 0;
1215     }
1216 
1217     assert(!req_pending(s));
1218 
1219     if (s->req_offset < XENSTORE_HEADER_SIZE) {
1220         void *ptr = s->req_data + s->req_offset;
1221         unsigned int len = XENSTORE_HEADER_SIZE;
1222         unsigned int copylen = copy_from_ring(s, ptr, len);
1223 
1224         copied += copylen;
1225         s->req_offset += copylen;
1226     }
1227 
1228     if (s->req_offset >= XENSTORE_HEADER_SIZE) {
1229         struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1230 
1231         if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) {
1232             error_report("Illegal XenStore request");
1233             s->fatal_error = true;
1234             return 0;
1235         }
1236 
1237         void *ptr = s->req_data + s->req_offset;
1238         unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset;
1239         unsigned int copylen = copy_from_ring(s, ptr, len);
1240 
1241         copied += copylen;
1242         s->req_offset += copylen;
1243     }
1244 
1245     return copied;
1246 }
1247 
put_rsp(XenXenstoreState * s)1248 static unsigned int put_rsp(XenXenstoreState *s)
1249 {
1250     if (s->fatal_error) {
1251         return 0;
1252     }
1253 
1254     assert(s->rsp_pending);
1255 
1256     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1257     assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len);
1258 
1259     void *ptr = s->rsp_data + s->rsp_offset;
1260     unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset;
1261     unsigned int copylen = copy_to_ring(s, ptr, len);
1262 
1263     s->rsp_offset += copylen;
1264 
1265     /* Have we produced a complete response? */
1266     if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) {
1267         reset_rsp(s);
1268     }
1269 
1270     return copylen;
1271 }
1272 
deliver_watch(XenXenstoreState * s,const char * path,const char * token)1273 static void deliver_watch(XenXenstoreState *s, const char *path,
1274                           const char *token)
1275 {
1276     struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1277     uint8_t *rsp_data = (uint8_t *)&rsp[1];
1278     unsigned int len;
1279 
1280     assert(!s->rsp_pending);
1281 
1282     trace_xenstore_watch_event(path, token);
1283 
1284     rsp->type = XS_WATCH_EVENT;
1285     rsp->req_id = 0;
1286     rsp->tx_id = 0;
1287     rsp->len = 0;
1288 
1289     len = strlen(path);
1290 
1291     /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */
1292     assert(rsp->len + len < XENSTORE_PAYLOAD_MAX);
1293 
1294     memcpy(&rsp_data[rsp->len], path, len);
1295     rsp->len += len;
1296     rsp_data[rsp->len] = '\0';
1297     rsp->len++;
1298 
1299     len = strlen(token);
1300     /*
1301      * It is possible for the guest to have chosen a token that will
1302      * not fit (along with the patch) into a watch event. We have no
1303      * choice but to drop the event if this is the case.
1304      */
1305     if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) {
1306         return;
1307     }
1308 
1309     memcpy(&rsp_data[rsp->len], token, len);
1310     rsp->len += len;
1311     rsp_data[rsp->len] = '\0';
1312     rsp->len++;
1313 
1314     s->rsp_pending = true;
1315 }
1316 
1317 struct watch_event {
1318     char *path;
1319     char *token;
1320 };
1321 
free_watch_event(struct watch_event * ev)1322 static void free_watch_event(struct watch_event *ev)
1323 {
1324     if (ev) {
1325         g_free(ev->path);
1326         g_free(ev->token);
1327         g_free(ev);
1328     }
1329 }
1330 
queue_watch(XenXenstoreState * s,const char * path,const char * token)1331 static void queue_watch(XenXenstoreState *s, const char *path,
1332                         const char *token)
1333 {
1334     struct watch_event *ev = g_new0(struct watch_event, 1);
1335 
1336     ev->path = g_strdup(path);
1337     ev->token = g_strdup(token);
1338 
1339     s->watch_events = g_list_append(s->watch_events, ev);
1340 }
1341 
fire_watch_cb(void * opaque,const char * path,const char * token)1342 static void fire_watch_cb(void *opaque, const char *path, const char *token)
1343 {
1344     XenXenstoreState *s = opaque;
1345 
1346     assert(bql_locked());
1347 
1348     /*
1349      * If there's a response pending, we obviously can't scribble over
1350      * it. But if there's a request pending, it has dibs on the buffer
1351      * too.
1352      *
1353      * In the common case of a watch firing due to backend activity
1354      * when the ring was otherwise idle, we should be able to copy the
1355      * strings directly into the rsp_data and thence the actual ring,
1356      * without needing to perform any allocations and queue them.
1357      */
1358     if (s->rsp_pending || req_pending(s)) {
1359         queue_watch(s, path, token);
1360     } else {
1361         deliver_watch(s, path, token);
1362         /*
1363          * Attempt to queue the message into the actual ring, and send
1364          * the event channel notification if any bytes are copied.
1365          */
1366         if (s->rsp_pending && put_rsp(s) > 0) {
1367             xen_be_evtchn_notify(s->eh, s->be_port);
1368         }
1369     }
1370 }
1371 
process_watch_events(XenXenstoreState * s)1372 static void process_watch_events(XenXenstoreState *s)
1373 {
1374     struct watch_event *ev = s->watch_events->data;
1375 
1376     deliver_watch(s, ev->path, ev->token);
1377 
1378     s->watch_events = g_list_remove(s->watch_events, ev);
1379     free_watch_event(ev);
1380 }
1381 
xen_xenstore_event(void * opaque)1382 static void xen_xenstore_event(void *opaque)
1383 {
1384     XenXenstoreState *s = opaque;
1385     evtchn_port_t port = xen_be_evtchn_pending(s->eh);
1386     unsigned int copied_to, copied_from;
1387     bool processed, notify = false;
1388 
1389     if (port != s->be_port) {
1390         return;
1391     }
1392 
1393     /* We know this is a no-op. */
1394     xen_be_evtchn_unmask(s->eh, port);
1395 
1396     do {
1397         copied_to = copied_from = 0;
1398         processed = false;
1399 
1400         if (!s->rsp_pending && s->watch_events) {
1401             process_watch_events(s);
1402         }
1403 
1404         if (s->rsp_pending) {
1405             copied_to = put_rsp(s);
1406         }
1407 
1408         if (!req_pending(s)) {
1409             copied_from = get_req(s);
1410         }
1411 
1412         if (req_pending(s) && !s->rsp_pending && !s->watch_events) {
1413             process_req(s);
1414             processed = true;
1415         }
1416 
1417         notify |= copied_to || copied_from;
1418     } while (copied_to || copied_from || processed);
1419 
1420     if (notify) {
1421         xen_be_evtchn_notify(s->eh, s->be_port);
1422     }
1423 }
1424 
alloc_guest_port(XenXenstoreState * s)1425 static void alloc_guest_port(XenXenstoreState *s)
1426 {
1427     struct evtchn_alloc_unbound alloc = {
1428         .dom = DOMID_SELF,
1429         .remote_dom = DOMID_QEMU,
1430     };
1431 
1432     if (!xen_evtchn_alloc_unbound_op(&alloc)) {
1433         s->guest_port = alloc.port;
1434     }
1435 }
1436 
xen_xenstore_reset(void)1437 int xen_xenstore_reset(void)
1438 {
1439     XenXenstoreState *s = xen_xenstore_singleton;
1440     int console_port;
1441     GList *perms;
1442     int err;
1443 
1444     if (!s) {
1445         return -ENOTSUP;
1446     }
1447 
1448     s->req_offset = s->rsp_offset = 0;
1449     s->rsp_pending = false;
1450 
1451     if (!memory_region_is_mapped(&s->xenstore_page)) {
1452         uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS;
1453         xen_overlay_do_map_page(&s->xenstore_page, gpa);
1454     }
1455 
1456     alloc_guest_port(s);
1457 
1458     /*
1459      * As qemu/dom0, bind to the guest's port. For incoming migration, this
1460      * will be unbound as the guest's evtchn table is overwritten. We then
1461      * rebind to the correct guest port in xen_xenstore_post_load().
1462      */
1463     err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port);
1464     if (err < 0) {
1465         return err;
1466     }
1467     s->be_port = err;
1468 
1469     /* Create frontend store nodes */
1470     perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
1471     perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
1472 
1473     relpath_printf(s, perms, "store/port", "%u", s->guest_port);
1474     relpath_printf(s, perms, "store/ring-ref", "%lu",
1475                    XEN_SPECIAL_PFN(XENSTORE));
1476 
1477     console_port = xen_primary_console_get_port();
1478     if (console_port) {
1479         relpath_printf(s, perms, "console/ring-ref", "%lu",
1480                        XEN_SPECIAL_PFN(CONSOLE));
1481         relpath_printf(s, perms, "console/port", "%u", console_port);
1482         relpath_printf(s, perms, "console/state", "%u", XenbusStateInitialised);
1483     }
1484 
1485     g_list_free_full(perms, g_free);
1486 
1487     /*
1488      * We don't actually access the guest's page through the grant, because
1489      * this isn't real Xen, and we can just use the page we gave it in the
1490      * first place. Map the grant anyway, mostly for cosmetic purposes so
1491      * it *looks* like it's in use in the guest-visible grant table.
1492      */
1493     s->gt = qemu_xen_gnttab_open();
1494     uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
1495     s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref,
1496                                              PROT_READ | PROT_WRITE);
1497 
1498     return 0;
1499 }
1500 
1501 struct qemu_xs_handle {
1502     XenstoreImplState *impl;
1503     GList *watches;
1504     QEMUBH *watch_bh;
1505 };
1506 
1507 struct qemu_xs_watch {
1508     struct qemu_xs_handle *h;
1509     char *path;
1510     xs_watch_fn fn;
1511     void *opaque;
1512     GList *events;
1513 };
1514 
xs_be_get_domain_path(struct qemu_xs_handle * h,unsigned int domid)1515 static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
1516 {
1517     return g_strdup_printf("/local/domain/%u", domid);
1518 }
1519 
xs_be_directory(struct qemu_xs_handle * h,xs_transaction_t t,const char * path,unsigned int * num)1520 static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
1521                               const char *path, unsigned int *num)
1522 {
1523     GList *items = NULL, *l;
1524     unsigned int i = 0;
1525     char **items_ret;
1526     int err;
1527 
1528     err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items);
1529     if (err) {
1530         errno = err;
1531         return NULL;
1532     }
1533 
1534     items_ret = g_new0(char *, g_list_length(items) + 1);
1535     *num = 0;
1536     for (l = items; l; l = l->next) {
1537         items_ret[i++] = l->data;
1538         (*num)++;
1539     }
1540     g_list_free(items);
1541     return items_ret;
1542 }
1543 
xs_be_read(struct qemu_xs_handle * h,xs_transaction_t t,const char * path,unsigned int * len)1544 static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
1545                         const char *path, unsigned int *len)
1546 {
1547     GByteArray *data = g_byte_array_new();
1548     bool free_segment = false;
1549     int err;
1550 
1551     err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1552     if (err) {
1553         free_segment = true;
1554         errno = err;
1555     } else {
1556         if (len) {
1557             *len = data->len;
1558         }
1559         /* The xen-bus-helper code expects to get NUL terminated string! */
1560         g_byte_array_append(data, (void *)"", 1);
1561     }
1562 
1563     return g_byte_array_free(data, free_segment);
1564 }
1565 
xs_be_write(struct qemu_xs_handle * h,xs_transaction_t t,const char * path,const void * data,unsigned int len)1566 static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
1567                         const char *path, const void *data, unsigned int len)
1568 {
1569     GByteArray *gdata = g_byte_array_new();
1570     int err;
1571 
1572     g_byte_array_append(gdata, data, len);
1573     err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
1574     g_byte_array_unref(gdata);
1575     if (err) {
1576         errno = err;
1577         return false;
1578     }
1579     return true;
1580 }
1581 
xs_be_create(struct qemu_xs_handle * h,xs_transaction_t t,unsigned int owner,unsigned int domid,unsigned int perms,const char * path)1582 static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
1583                          unsigned int owner, unsigned int domid,
1584                          unsigned int perms, const char *path)
1585 {
1586     g_autoptr(GByteArray) data = g_byte_array_new();
1587     GList *perms_list = NULL;
1588     int err;
1589 
1590     /* mkdir does this */
1591     err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1592     if (err == ENOENT) {
1593         err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data);
1594     }
1595     if (err) {
1596         errno = err;
1597         return false;
1598     }
1599 
1600     perms_list = g_list_append(perms_list,
1601                                xs_perm_as_string(XS_PERM_NONE, owner));
1602     perms_list = g_list_append(perms_list,
1603                                xs_perm_as_string(perms, domid));
1604 
1605     err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list);
1606     g_list_free_full(perms_list, g_free);
1607     if (err) {
1608         errno = err;
1609         return false;
1610     }
1611     return true;
1612 }
1613 
xs_be_destroy(struct qemu_xs_handle * h,xs_transaction_t t,const char * path)1614 static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
1615                           const char *path)
1616 {
1617     int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path);
1618     if (err) {
1619         errno = err;
1620         return false;
1621     }
1622     return true;
1623 }
1624 
be_watch_bh(void * _h)1625 static void be_watch_bh(void *_h)
1626 {
1627     struct qemu_xs_handle *h = _h;
1628     GList *l;
1629 
1630     for (l = h->watches; l; l = l->next) {
1631         struct qemu_xs_watch *w = l->data;
1632 
1633         while (w->events) {
1634             struct watch_event *ev = w->events->data;
1635 
1636             w->fn(w->opaque, ev->path);
1637 
1638             w->events = g_list_remove(w->events, ev);
1639             free_watch_event(ev);
1640         }
1641     }
1642 }
1643 
xs_be_watch_cb(void * opaque,const char * path,const char * token)1644 static void xs_be_watch_cb(void *opaque, const char *path, const char *token)
1645 {
1646     struct watch_event *ev = g_new0(struct watch_event, 1);
1647     struct qemu_xs_watch *w = opaque;
1648 
1649     /* We don't care about the token */
1650     ev->path = g_strdup(path);
1651     w->events = g_list_append(w->events, ev);
1652 
1653     qemu_bh_schedule(w->h->watch_bh);
1654 }
1655 
xs_be_watch(struct qemu_xs_handle * h,const char * path,xs_watch_fn fn,void * opaque)1656 static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h,
1657                                          const char *path, xs_watch_fn fn,
1658                                          void *opaque)
1659 {
1660     struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
1661     int err;
1662 
1663     w->h = h;
1664     w->fn = fn;
1665     w->opaque = opaque;
1666 
1667     err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w);
1668     if (err) {
1669         errno = err;
1670         g_free(w);
1671         return NULL;
1672     }
1673 
1674     w->path = g_strdup(path);
1675     h->watches = g_list_append(h->watches, w);
1676     return w;
1677 }
1678 
xs_be_unwatch(struct qemu_xs_handle * h,struct qemu_xs_watch * w)1679 static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
1680 {
1681     xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w);
1682 
1683     h->watches = g_list_remove(h->watches, w);
1684     g_list_free_full(w->events, (GDestroyNotify)free_watch_event);
1685     g_free(w->path);
1686     g_free(w);
1687 }
1688 
xs_be_transaction_start(struct qemu_xs_handle * h)1689 static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h)
1690 {
1691     unsigned int new_tx = XBT_NULL;
1692     int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx);
1693     if (err) {
1694         errno = err;
1695         return XBT_NULL;
1696     }
1697     return new_tx;
1698 }
1699 
xs_be_transaction_end(struct qemu_xs_handle * h,xs_transaction_t t,bool abort)1700 static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
1701                                   bool abort)
1702 {
1703     int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort);
1704     if (err) {
1705         errno = err;
1706         return false;
1707     }
1708     return true;
1709 }
1710 
xs_be_open(void)1711 static struct qemu_xs_handle *xs_be_open(void)
1712 {
1713     XenXenstoreState *s = xen_xenstore_singleton;
1714     struct qemu_xs_handle *h;
1715 
1716     if (!s || !s->impl) {
1717         errno = -ENOSYS;
1718         return NULL;
1719     }
1720 
1721     h = g_new0(struct qemu_xs_handle, 1);
1722     h->impl = s->impl;
1723 
1724     h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h);
1725 
1726     return h;
1727 }
1728 
xs_be_close(struct qemu_xs_handle * h)1729 static void xs_be_close(struct qemu_xs_handle *h)
1730 {
1731     while (h->watches) {
1732         struct qemu_xs_watch *w = h->watches->data;
1733         xs_be_unwatch(h, w);
1734     }
1735 
1736     qemu_bh_delete(h->watch_bh);
1737     g_free(h);
1738 }
1739 
1740 static struct xenstore_backend_ops emu_xenstore_backend_ops = {
1741     .open = xs_be_open,
1742     .close = xs_be_close,
1743     .get_domain_path = xs_be_get_domain_path,
1744     .directory = xs_be_directory,
1745     .read = xs_be_read,
1746     .write = xs_be_write,
1747     .create = xs_be_create,
1748     .destroy = xs_be_destroy,
1749     .watch = xs_be_watch,
1750     .unwatch = xs_be_unwatch,
1751     .transaction_start = xs_be_transaction_start,
1752     .transaction_end = xs_be_transaction_end,
1753 };
1754