1 /*
2 * QEMU Xen emulation: Shared/overlay pages support
3 *
4 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5 *
6 * Authors: David Woodhouse <dwmw2@infradead.org>
7 *
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
10 */
11
12 #include "qemu/osdep.h"
13
14 #include "qemu/host-utils.h"
15 #include "qemu/module.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qapi/error.h"
20 #include "qom/object.h"
21 #include "migration/vmstate.h"
22
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_evtchn.h"
28 #include "xen_primary_console.h"
29 #include "xen_xenstore.h"
30
31 #include "system/kvm.h"
32 #include "system/kvm_xen.h"
33
34 #include "trace.h"
35
36 #include "xenstore_impl.h"
37
38 #include "hw/xen/interface/io/xs_wire.h"
39 #include "hw/xen/interface/event_channel.h"
40 #include "hw/xen/interface/grant_table.h"
41
42 #define TYPE_XEN_XENSTORE "xen-xenstore"
43 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
44
45 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
46 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
47
48 #define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg))
49
50 struct XenXenstoreState {
51 /*< private >*/
52 SysBusDevice busdev;
53 /*< public >*/
54
55 XenstoreImplState *impl;
56 GList *watch_events; /* for the guest */
57
58 MemoryRegion xenstore_page;
59 struct xenstore_domain_interface *xs;
60 uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
61 uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
62 uint32_t req_offset;
63 uint32_t rsp_offset;
64 bool rsp_pending;
65 bool fatal_error;
66
67 evtchn_port_t guest_port;
68 evtchn_port_t be_port;
69 struct xenevtchn_handle *eh;
70
71 uint8_t *impl_state;
72 uint32_t impl_state_size;
73
74 struct xengntdev_handle *gt;
75 void *granted_xs;
76 };
77
78 struct XenXenstoreState *xen_xenstore_singleton;
79
80 static void xen_xenstore_event(void *opaque);
81 static void fire_watch_cb(void *opaque, const char *path, const char *token);
82
83 static struct xenstore_backend_ops emu_xenstore_backend_ops;
84
relpath_printf(XenXenstoreState * s,GList * perms,const char * relpath,const char * fmt,...)85 static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
86 GList *perms,
87 const char *relpath,
88 const char *fmt, ...)
89 {
90 gchar *abspath;
91 gchar *value;
92 va_list args;
93 GByteArray *data;
94 int err;
95
96 abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
97 va_start(args, fmt);
98 value = g_strdup_vprintf(fmt, args);
99 va_end(args);
100
101 data = g_byte_array_new_take((void *)value, strlen(value));
102
103 err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
104 assert(!err);
105
106 g_byte_array_unref(data);
107
108 err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
109 assert(!err);
110
111 g_free(abspath);
112 }
113
xen_xenstore_realize(DeviceState * dev,Error ** errp)114 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
115 {
116 XenXenstoreState *s = XEN_XENSTORE(dev);
117 GList *perms;
118
119 if (xen_mode != XEN_EMULATE) {
120 error_setg(errp, "Xen xenstore support is for Xen emulation");
121 return;
122 }
123 memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page",
124 XEN_PAGE_SIZE, &error_abort);
125 memory_region_set_enabled(&s->xenstore_page, true);
126 s->xs = memory_region_get_ram_ptr(&s->xenstore_page);
127 memset(s->xs, 0, XEN_PAGE_SIZE);
128
129 /* We can't map it this early as KVM isn't ready */
130 xen_xenstore_singleton = s;
131
132 s->eh = xen_be_evtchn_open();
133 if (!s->eh) {
134 error_setg(errp, "Xenstore evtchn port init failed");
135 return;
136 }
137 aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh),
138 xen_xenstore_event, NULL, NULL, NULL, s);
139
140 s->impl = xs_impl_create(xen_domid);
141
142 /* Populate the default nodes */
143
144 /* Nodes owned by 'dom0' but readable by the guest */
145 perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
146 perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
147
148 relpath_printf(s, perms, "", "%s", "");
149
150 relpath_printf(s, perms, "domid", "%u", xen_domid);
151
152 relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1);
153 relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1);
154
155 relpath_printf(s, perms, "platform/acpi", "%u", 1);
156 relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
157 relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
158 relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
159
160 g_list_free_full(perms, g_free);
161
162 /* Nodes owned by the guest */
163 perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
164
165 relpath_printf(s, perms, "attr", "%s", "");
166
167 relpath_printf(s, perms, "control/shutdown", "%s", "");
168 relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
169 relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
170 relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
171 relpath_printf(s, perms, "control/feature-s3", "%u", 1);
172 relpath_printf(s, perms, "control/feature-s4", "%u", 1);
173
174 relpath_printf(s, perms, "data", "%s", "");
175 relpath_printf(s, perms, "device", "%s", "");
176 relpath_printf(s, perms, "drivers", "%s", "");
177 relpath_printf(s, perms, "error", "%s", "");
178 relpath_printf(s, perms, "feature", "%s", "");
179
180 g_list_free_full(perms, g_free);
181
182 xen_xenstore_ops = &emu_xenstore_backend_ops;
183 }
184
xen_xenstore_is_needed(void * opaque)185 static bool xen_xenstore_is_needed(void *opaque)
186 {
187 return xen_mode == XEN_EMULATE;
188 }
189
xen_xenstore_pre_save(void * opaque)190 static int xen_xenstore_pre_save(void *opaque)
191 {
192 XenXenstoreState *s = opaque;
193 GByteArray *save;
194
195 if (s->eh) {
196 s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
197 }
198
199 g_free(s->impl_state);
200 save = xs_impl_serialize(s->impl);
201 s->impl_state = save->data;
202 s->impl_state_size = save->len;
203 g_byte_array_free(save, false);
204
205 return 0;
206 }
207
xen_xenstore_post_load(void * opaque,int ver)208 static int xen_xenstore_post_load(void *opaque, int ver)
209 {
210 XenXenstoreState *s = opaque;
211 GByteArray *save;
212
213 /*
214 * As qemu/dom0, rebind to the guest's port. The Windows drivers may
215 * unbind the XenStore evtchn and rebind to it, having obtained the
216 * "remote" port through EVTCHNOP_status. In the case that migration
217 * occurs while it's unbound, the "remote" port needs to be the same
218 * as before so that the guest can find it, but should remain unbound.
219 */
220 if (s->guest_port) {
221 int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid,
222 s->guest_port);
223 if (be_port < 0) {
224 return be_port;
225 }
226 s->be_port = be_port;
227 }
228
229 save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
230 s->impl_state = NULL;
231 s->impl_state_size = 0;
232
233 return xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
234 }
235
236 static const VMStateDescription xen_xenstore_vmstate = {
237 .name = "xen_xenstore",
238 .unmigratable = 1, /* The PV back ends don't migrate yet */
239 .version_id = 1,
240 .minimum_version_id = 1,
241 .needed = xen_xenstore_is_needed,
242 .pre_save = xen_xenstore_pre_save,
243 .post_load = xen_xenstore_post_load,
244 .fields = (const VMStateField[]) {
245 VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState,
246 sizeof_field(XenXenstoreState, req_data)),
247 VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState,
248 sizeof_field(XenXenstoreState, rsp_data)),
249 VMSTATE_UINT32(req_offset, XenXenstoreState),
250 VMSTATE_UINT32(rsp_offset, XenXenstoreState),
251 VMSTATE_BOOL(rsp_pending, XenXenstoreState),
252 VMSTATE_UINT32(guest_port, XenXenstoreState),
253 VMSTATE_BOOL(fatal_error, XenXenstoreState),
254 VMSTATE_UINT32(impl_state_size, XenXenstoreState),
255 VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
256 impl_state_size, 0,
257 vmstate_info_uint8, uint8_t),
258 VMSTATE_END_OF_LIST()
259 }
260 };
261
xen_xenstore_class_init(ObjectClass * klass,const void * data)262 static void xen_xenstore_class_init(ObjectClass *klass, const void *data)
263 {
264 DeviceClass *dc = DEVICE_CLASS(klass);
265
266 dc->realize = xen_xenstore_realize;
267 dc->vmsd = &xen_xenstore_vmstate;
268 }
269
270 static const TypeInfo xen_xenstore_info = {
271 .name = TYPE_XEN_XENSTORE,
272 .parent = TYPE_SYS_BUS_DEVICE,
273 .instance_size = sizeof(XenXenstoreState),
274 .class_init = xen_xenstore_class_init,
275 };
276
xen_xenstore_create(void)277 void xen_xenstore_create(void)
278 {
279 DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL);
280
281 xen_xenstore_singleton = XEN_XENSTORE(dev);
282
283 /*
284 * Defer the init (xen_xenstore_reset()) until KVM is set up and the
285 * overlay page can be mapped.
286 */
287 }
288
xen_xenstore_register_types(void)289 static void xen_xenstore_register_types(void)
290 {
291 type_register_static(&xen_xenstore_info);
292 }
293
type_init(xen_xenstore_register_types)294 type_init(xen_xenstore_register_types)
295
296 uint16_t xen_xenstore_get_port(void)
297 {
298 XenXenstoreState *s = xen_xenstore_singleton;
299 if (!s) {
300 return 0;
301 }
302 return s->guest_port;
303 }
304
req_pending(XenXenstoreState * s)305 static bool req_pending(XenXenstoreState *s)
306 {
307 struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
308
309 return s->req_offset == XENSTORE_HEADER_SIZE + req->len;
310 }
311
reset_req(XenXenstoreState * s)312 static void reset_req(XenXenstoreState *s)
313 {
314 memset(s->req_data, 0, sizeof(s->req_data));
315 s->req_offset = 0;
316 }
317
reset_rsp(XenXenstoreState * s)318 static void reset_rsp(XenXenstoreState *s)
319 {
320 s->rsp_pending = false;
321
322 memset(s->rsp_data, 0, sizeof(s->rsp_data));
323 s->rsp_offset = 0;
324 }
325
xs_error(XenXenstoreState * s,unsigned int id,xs_transaction_t tx_id,int errnum)326 static void xs_error(XenXenstoreState *s, unsigned int id,
327 xs_transaction_t tx_id, int errnum)
328 {
329 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
330 const char *errstr = NULL;
331
332 for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
333 const struct xsd_errors *xsd_error = &xsd_errors[i];
334
335 if (xsd_error->errnum == errnum) {
336 errstr = xsd_error->errstring;
337 break;
338 }
339 }
340 assert(errstr);
341
342 trace_xenstore_error(id, tx_id, errstr);
343
344 rsp->type = XS_ERROR;
345 rsp->req_id = id;
346 rsp->tx_id = tx_id;
347 rsp->len = (uint32_t)strlen(errstr) + 1;
348
349 memcpy(&rsp[1], errstr, rsp->len);
350 }
351
xs_ok(XenXenstoreState * s,unsigned int type,unsigned int req_id,xs_transaction_t tx_id)352 static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
353 xs_transaction_t tx_id)
354 {
355 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
356 const char *okstr = "OK";
357
358 rsp->type = type;
359 rsp->req_id = req_id;
360 rsp->tx_id = tx_id;
361 rsp->len = (uint32_t)strlen(okstr) + 1;
362
363 memcpy(&rsp[1], okstr, rsp->len);
364 }
365
366 /*
367 * The correct request and response formats are documented in xen.git:
368 * docs/misc/xenstore.txt. A summary is given below for convenience.
369 * The '|' symbol represents a NUL character.
370 *
371 * ---------- Database read, write and permissions operations ----------
372 *
373 * READ <path>| <value|>
374 * WRITE <path>|<value|>
375 * Store and read the octet string <value> at <path>.
376 * WRITE creates any missing parent paths, with empty values.
377 *
378 * MKDIR <path>|
379 * Ensures that the <path> exists, by necessary by creating
380 * it and any missing parents with empty values. If <path>
381 * or any parent already exists, its value is left unchanged.
382 *
383 * RM <path>|
384 * Ensures that the <path> does not exist, by deleting
385 * it and all of its children. It is not an error if <path> does
386 * not exist, but it _is_ an error if <path>'s immediate parent
387 * does not exist either.
388 *
389 * DIRECTORY <path>| <child-leaf-name>|*
390 * Gives a list of the immediate children of <path>, as only the
391 * leafnames. The resulting children are each named
392 * <path>/<child-leaf-name>.
393 *
394 * DIRECTORY_PART <path>|<offset> <gencnt>|<child-leaf-name>|*
395 * Same as DIRECTORY, but to be used for children lists longer than
396 * XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into
397 * the list of children to return. Return values are the generation
398 * count <gencnt> of the node (to be used to ensure the node hasn't
399 * changed between two reads: <gencnt> being the same for multiple
400 * reads guarantees the node hasn't changed) and the list of children
401 * starting at the specified <offset> of the complete list.
402 *
403 * GET_PERMS <path>| <perm-as-string>|+
404 * SET_PERMS <path>|<perm-as-string>|+?
405 * <perm-as-string> is one of the following
406 * w<domid> write only
407 * r<domid> read only
408 * b<domid> both read and write
409 * n<domid> no access
410 * See https://wiki.xen.org/wiki/XenBus section
411 * `Permissions' for details of the permissions system.
412 * It is possible to set permissions for the special watch paths
413 * "@introduceDomain" and "@releaseDomain" to enable receiving those
414 * watches in unprivileged domains.
415 *
416 * ---------- Watches ----------
417 *
418 * WATCH <wpath>|<token>|?
419 * Adds a watch.
420 *
421 * When a <path> is modified (including path creation, removal,
422 * contents change or permissions change) this generates an event
423 * on the changed <path>. Changes made in transactions cause an
424 * event only if and when committed. Each occurring event is
425 * matched against all the watches currently set up, and each
426 * matching watch results in a WATCH_EVENT message (see below).
427 *
428 * The event's path matches the watch's <wpath> if it is an child
429 * of <wpath>.
430 *
431 * <wpath> can be a <path> to watch or @<wspecial>. In the
432 * latter case <wspecial> may have any syntax but it matches
433 * (according to the rules above) only the following special
434 * events which are invented by xenstored:
435 * @introduceDomain occurs on INTRODUCE
436 * @releaseDomain occurs on any domain crash or
437 * shutdown, and also on RELEASE
438 * and domain destruction
439 * <wspecial> events are sent to privileged callers or explicitly
440 * via SET_PERMS enabled domains only.
441 *
442 * When a watch is first set up it is triggered once straight
443 * away, with <path> equal to <wpath>. Watches may be triggered
444 * spuriously. The tx_id in a WATCH request is ignored.
445 *
446 * Watches are supposed to be restricted by the permissions
447 * system but in practice the implementation is imperfect.
448 * Applications should not rely on being sent a notification for
449 * paths that they cannot read; however, an application may rely
450 * on being sent a watch when a path which it _is_ able to read
451 * is deleted even if that leaves only a nonexistent unreadable
452 * parent. A notification may omitted if a node's permissions
453 * are changed so as to make it unreadable, in which case future
454 * notifications may be suppressed (and if the node is later made
455 * readable, some notifications may have been lost).
456 *
457 * WATCH_EVENT <epath>|<token>|
458 * Unsolicited `reply' generated for matching modification events
459 * as described above. req_id and tx_id are both 0.
460 *
461 * <epath> is the event's path, ie the actual path that was
462 * modified; however if the event was the recursive removal of an
463 * parent of <wpath>, <epath> is just
464 * <wpath> (rather than the actual path which was removed). So
465 * <epath> is a child of <wpath>, regardless.
466 *
467 * Iff <wpath> for the watch was specified as a relative pathname,
468 * the <epath> path will also be relative (with the same base,
469 * obviously).
470 *
471 * UNWATCH <wpath>|<token>|?
472 *
473 * RESET_WATCHES |
474 * Reset all watches and transactions of the caller.
475 *
476 * ---------- Transactions ----------
477 *
478 * TRANSACTION_START | <transid>|
479 * <transid> is an opaque uint32_t allocated by xenstored
480 * represented as unsigned decimal. After this, transaction may
481 * be referenced by using <transid> (as 32-bit binary) in the
482 * tx_id request header field. When transaction is started whole
483 * db is copied; reads and writes happen on the copy.
484 * It is not legal to send non-0 tx_id in TRANSACTION_START.
485 *
486 * TRANSACTION_END T|
487 * TRANSACTION_END F|
488 * tx_id must refer to existing transaction. After this
489 * request the tx_id is no longer valid and may be reused by
490 * xenstore. If F, the transaction is discarded. If T,
491 * it is committed: if there were any other intervening writes
492 * then our END gets get EAGAIN.
493 *
494 * The plan is that in the future only intervening `conflicting'
495 * writes cause EAGAIN, meaning only writes or other commits
496 * which changed paths which were read or written in the
497 * transaction at hand.
498 *
499 */
500
xs_read(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)501 static void xs_read(XenXenstoreState *s, unsigned int req_id,
502 xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
503 {
504 const char *path = (const char *)req_data;
505 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
506 uint8_t *rsp_data = (uint8_t *)&rsp[1];
507 g_autoptr(GByteArray) data = g_byte_array_new();
508 int err;
509
510 if (len == 0 || req_data[len - 1] != '\0') {
511 xs_error(s, req_id, tx_id, EINVAL);
512 return;
513 }
514
515 trace_xenstore_read(tx_id, path);
516 err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
517 if (err) {
518 xs_error(s, req_id, tx_id, err);
519 return;
520 }
521
522 rsp->type = XS_READ;
523 rsp->req_id = req_id;
524 rsp->tx_id = tx_id;
525 rsp->len = 0;
526
527 len = data->len;
528 if (len > XENSTORE_PAYLOAD_MAX) {
529 xs_error(s, req_id, tx_id, E2BIG);
530 return;
531 }
532
533 if (!len) {
534 return;
535 }
536
537 memcpy(&rsp_data[rsp->len], data->data, len);
538 rsp->len += len;
539 }
540
xs_write(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)541 static void xs_write(XenXenstoreState *s, unsigned int req_id,
542 xs_transaction_t tx_id, uint8_t *req_data,
543 unsigned int len)
544 {
545 g_autoptr(GByteArray) data = g_byte_array_new();
546 const char *path;
547 int err;
548
549 if (len == 0) {
550 xs_error(s, req_id, tx_id, EINVAL);
551 return;
552 }
553
554 path = (const char *)req_data;
555
556 while (len--) {
557 if (*req_data++ == '\0') {
558 break;
559 }
560 if (len == 0) {
561 xs_error(s, req_id, tx_id, EINVAL);
562 return;
563 }
564 }
565
566 g_byte_array_append(data, req_data, len);
567
568 trace_xenstore_write(tx_id, path);
569 err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
570 if (err) {
571 xs_error(s, req_id, tx_id, err);
572 return;
573 }
574
575 xs_ok(s, XS_WRITE, req_id, tx_id);
576 }
577
xs_mkdir(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)578 static void xs_mkdir(XenXenstoreState *s, unsigned int req_id,
579 xs_transaction_t tx_id, uint8_t *req_data,
580 unsigned int len)
581 {
582 g_autoptr(GByteArray) data = g_byte_array_new();
583 const char *path;
584 int err;
585
586 if (len == 0 || req_data[len - 1] != '\0') {
587 xs_error(s, req_id, tx_id, EINVAL);
588 return;
589 }
590
591 path = (const char *)req_data;
592
593 trace_xenstore_mkdir(tx_id, path);
594 err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
595 if (err == ENOENT) {
596 err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
597 }
598
599 if (!err) {
600 xs_error(s, req_id, tx_id, err);
601 return;
602 }
603
604 xs_ok(s, XS_MKDIR, req_id, tx_id);
605 }
606
xs_append_strings(XenXenstoreState * s,struct xsd_sockmsg * rsp,GList * strings,unsigned int start,bool truncate)607 static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp,
608 GList *strings, unsigned int start, bool truncate)
609 {
610 uint8_t *rsp_data = (uint8_t *)&rsp[1];
611 GList *l;
612
613 for (l = strings; l; l = l->next) {
614 size_t len = strlen(l->data) + 1; /* Including the NUL termination */
615 char *str = l->data;
616
617 if (rsp->len + len > XENSTORE_PAYLOAD_MAX) {
618 if (truncate) {
619 len = XENSTORE_PAYLOAD_MAX - rsp->len;
620 if (!len) {
621 return;
622 }
623 } else {
624 xs_error(s, rsp->req_id, rsp->tx_id, E2BIG);
625 return;
626 }
627 }
628
629 if (start) {
630 if (start >= len) {
631 start -= len;
632 continue;
633 }
634
635 str += start;
636 len -= start;
637 start = 0;
638 }
639
640 memcpy(&rsp_data[rsp->len], str, len);
641 rsp->len += len;
642 }
643 /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */
644 if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) {
645 rsp_data[rsp->len++] = '\0';
646 }
647 }
648
xs_directory(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)649 static void xs_directory(XenXenstoreState *s, unsigned int req_id,
650 xs_transaction_t tx_id, uint8_t *req_data,
651 unsigned int len)
652 {
653 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
654 GList *items = NULL;
655 const char *path;
656 int err;
657
658 if (len == 0 || req_data[len - 1] != '\0') {
659 xs_error(s, req_id, tx_id, EINVAL);
660 return;
661 }
662
663 path = (const char *)req_data;
664
665 trace_xenstore_directory(tx_id, path);
666 err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items);
667 if (err != 0) {
668 xs_error(s, req_id, tx_id, err);
669 return;
670 }
671
672 rsp->type = XS_DIRECTORY;
673 rsp->req_id = req_id;
674 rsp->tx_id = tx_id;
675 rsp->len = 0;
676
677 xs_append_strings(s, rsp, items, 0, false);
678
679 g_list_free_full(items, g_free);
680 }
681
xs_directory_part(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)682 static void xs_directory_part(XenXenstoreState *s, unsigned int req_id,
683 xs_transaction_t tx_id, uint8_t *req_data,
684 unsigned int len)
685 {
686 const char *offset_str, *path = (const char *)req_data;
687 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
688 char *rsp_data = (char *)&rsp[1];
689 uint64_t gencnt = 0;
690 unsigned int offset;
691 GList *items = NULL;
692 int err;
693
694 if (len == 0) {
695 xs_error(s, req_id, tx_id, EINVAL);
696 return;
697 }
698
699 while (len--) {
700 if (*req_data++ == '\0') {
701 break;
702 }
703 if (len == 0) {
704 xs_error(s, req_id, tx_id, EINVAL);
705 return;
706 }
707 }
708
709 offset_str = (const char *)req_data;
710 while (len--) {
711 if (*req_data++ == '\0') {
712 break;
713 }
714 if (len == 0) {
715 xs_error(s, req_id, tx_id, EINVAL);
716 return;
717 }
718 }
719
720 if (len) {
721 xs_error(s, req_id, tx_id, EINVAL);
722 return;
723 }
724
725 if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) {
726 xs_error(s, req_id, tx_id, EINVAL);
727 return;
728 }
729
730 trace_xenstore_directory_part(tx_id, path, offset);
731 err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items);
732 if (err != 0) {
733 xs_error(s, req_id, tx_id, err);
734 return;
735 }
736
737 rsp->type = XS_DIRECTORY_PART;
738 rsp->req_id = req_id;
739 rsp->tx_id = tx_id;
740 rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1;
741
742 xs_append_strings(s, rsp, items, offset, true);
743
744 g_list_free_full(items, g_free);
745 }
746
xs_transaction_start(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)747 static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id,
748 xs_transaction_t tx_id, uint8_t *req_data,
749 unsigned int len)
750 {
751 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
752 char *rsp_data = (char *)&rsp[1];
753 int err;
754
755 if (len != 1 || req_data[0] != '\0') {
756 xs_error(s, req_id, tx_id, EINVAL);
757 return;
758 }
759
760 rsp->type = XS_TRANSACTION_START;
761 rsp->req_id = req_id;
762 rsp->tx_id = tx_id;
763 rsp->len = 0;
764
765 err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id);
766 if (err) {
767 xs_error(s, req_id, tx_id, err);
768 return;
769 }
770
771 trace_xenstore_transaction_start(tx_id);
772
773 rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id);
774 assert(rsp->len < XENSTORE_PAYLOAD_MAX);
775 rsp->len++;
776 }
777
xs_transaction_end(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)778 static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id,
779 xs_transaction_t tx_id, uint8_t *req_data,
780 unsigned int len)
781 {
782 bool commit;
783 int err;
784
785 if (len != 2 || req_data[1] != '\0') {
786 xs_error(s, req_id, tx_id, EINVAL);
787 return;
788 }
789
790 switch (req_data[0]) {
791 case 'T':
792 commit = true;
793 break;
794 case 'F':
795 commit = false;
796 break;
797 default:
798 xs_error(s, req_id, tx_id, EINVAL);
799 return;
800 }
801
802 trace_xenstore_transaction_end(tx_id, commit);
803 err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit);
804 if (err) {
805 xs_error(s, req_id, tx_id, err);
806 return;
807 }
808
809 xs_ok(s, XS_TRANSACTION_END, req_id, tx_id);
810 }
811
xs_rm(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)812 static void xs_rm(XenXenstoreState *s, unsigned int req_id,
813 xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
814 {
815 const char *path = (const char *)req_data;
816 int err;
817
818 if (len == 0 || req_data[len - 1] != '\0') {
819 xs_error(s, req_id, tx_id, EINVAL);
820 return;
821 }
822
823 trace_xenstore_rm(tx_id, path);
824 err = xs_impl_rm(s->impl, xen_domid, tx_id, path);
825 if (err) {
826 xs_error(s, req_id, tx_id, err);
827 return;
828 }
829
830 xs_ok(s, XS_RM, req_id, tx_id);
831 }
832
xs_get_perms(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)833 static void xs_get_perms(XenXenstoreState *s, unsigned int req_id,
834 xs_transaction_t tx_id, uint8_t *req_data,
835 unsigned int len)
836 {
837 const char *path = (const char *)req_data;
838 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
839 GList *perms = NULL;
840 int err;
841
842 if (len == 0 || req_data[len - 1] != '\0') {
843 xs_error(s, req_id, tx_id, EINVAL);
844 return;
845 }
846
847 trace_xenstore_get_perms(tx_id, path);
848 err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms);
849 if (err) {
850 xs_error(s, req_id, tx_id, err);
851 return;
852 }
853
854 rsp->type = XS_GET_PERMS;
855 rsp->req_id = req_id;
856 rsp->tx_id = tx_id;
857 rsp->len = 0;
858
859 xs_append_strings(s, rsp, perms, 0, false);
860
861 g_list_free_full(perms, g_free);
862 }
863
xs_set_perms(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)864 static void xs_set_perms(XenXenstoreState *s, unsigned int req_id,
865 xs_transaction_t tx_id, uint8_t *req_data,
866 unsigned int len)
867 {
868 const char *path = (const char *)req_data;
869 uint8_t *perm;
870 GList *perms = NULL;
871 int err;
872
873 if (len == 0) {
874 xs_error(s, req_id, tx_id, EINVAL);
875 return;
876 }
877
878 while (len--) {
879 if (*req_data++ == '\0') {
880 break;
881 }
882 if (len == 0) {
883 xs_error(s, req_id, tx_id, EINVAL);
884 return;
885 }
886 }
887
888 perm = req_data;
889 while (len--) {
890 if (*req_data++ == '\0') {
891 perms = g_list_append(perms, perm);
892 perm = req_data;
893 }
894 }
895
896 /*
897 * Note that there may be trailing garbage at the end of the buffer.
898 * This is explicitly permitted by the '?' at the end of the definition:
899 *
900 * SET_PERMS <path>|<perm-as-string>|+?
901 */
902
903 trace_xenstore_set_perms(tx_id, path);
904 err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms);
905 g_list_free(perms);
906 if (err) {
907 xs_error(s, req_id, tx_id, err);
908 return;
909 }
910
911 xs_ok(s, XS_SET_PERMS, req_id, tx_id);
912 }
913
xs_watch(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)914 static void xs_watch(XenXenstoreState *s, unsigned int req_id,
915 xs_transaction_t tx_id, uint8_t *req_data,
916 unsigned int len)
917 {
918 const char *token, *path = (const char *)req_data;
919 int err;
920
921 if (len == 0) {
922 xs_error(s, req_id, tx_id, EINVAL);
923 return;
924 }
925
926 while (len--) {
927 if (*req_data++ == '\0') {
928 break;
929 }
930 if (len == 0) {
931 xs_error(s, req_id, tx_id, EINVAL);
932 return;
933 }
934 }
935
936 token = (const char *)req_data;
937 while (len--) {
938 if (*req_data++ == '\0') {
939 break;
940 }
941 if (len == 0) {
942 xs_error(s, req_id, tx_id, EINVAL);
943 return;
944 }
945 }
946
947 /*
948 * Note that there may be trailing garbage at the end of the buffer.
949 * This is explicitly permitted by the '?' at the end of the definition:
950 *
951 * WATCH <wpath>|<token>|?
952 */
953
954 trace_xenstore_watch(path, token);
955 err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s);
956 if (err) {
957 xs_error(s, req_id, tx_id, err);
958 return;
959 }
960
961 xs_ok(s, XS_WATCH, req_id, tx_id);
962 }
963
xs_unwatch(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)964 static void xs_unwatch(XenXenstoreState *s, unsigned int req_id,
965 xs_transaction_t tx_id, uint8_t *req_data,
966 unsigned int len)
967 {
968 const char *token, *path = (const char *)req_data;
969 int err;
970
971 if (len == 0) {
972 xs_error(s, req_id, tx_id, EINVAL);
973 return;
974 }
975
976 while (len--) {
977 if (*req_data++ == '\0') {
978 break;
979 }
980 if (len == 0) {
981 xs_error(s, req_id, tx_id, EINVAL);
982 return;
983 }
984 }
985
986 token = (const char *)req_data;
987 while (len--) {
988 if (*req_data++ == '\0') {
989 break;
990 }
991 if (len == 0) {
992 xs_error(s, req_id, tx_id, EINVAL);
993 return;
994 }
995 }
996
997 trace_xenstore_unwatch(path, token);
998 err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s);
999 if (err) {
1000 xs_error(s, req_id, tx_id, err);
1001 return;
1002 }
1003
1004 xs_ok(s, XS_UNWATCH, req_id, tx_id);
1005 }
1006
xs_reset_watches(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)1007 static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id,
1008 xs_transaction_t tx_id, uint8_t *req_data,
1009 unsigned int len)
1010 {
1011 if (len == 0 || req_data[len - 1] != '\0') {
1012 xs_error(s, req_id, tx_id, EINVAL);
1013 return;
1014 }
1015
1016 trace_xenstore_reset_watches();
1017 xs_impl_reset_watches(s->impl, xen_domid);
1018
1019 xs_ok(s, XS_RESET_WATCHES, req_id, tx_id);
1020 }
1021
xs_priv(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * data,unsigned int len)1022 static void xs_priv(XenXenstoreState *s, unsigned int req_id,
1023 xs_transaction_t tx_id, uint8_t *data,
1024 unsigned int len)
1025 {
1026 xs_error(s, req_id, tx_id, EACCES);
1027 }
1028
xs_unimpl(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * data,unsigned int len)1029 static void xs_unimpl(XenXenstoreState *s, unsigned int req_id,
1030 xs_transaction_t tx_id, uint8_t *data,
1031 unsigned int len)
1032 {
1033 xs_error(s, req_id, tx_id, ENOSYS);
1034 }
1035
1036 typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id,
1037 xs_transaction_t tx_id, uint8_t *data,
1038 unsigned int len);
1039
1040 struct xsd_req {
1041 const char *name;
1042 xs_impl fn;
1043 };
1044 #define XSD_REQ(_type, _fn) \
1045 [_type] = { .name = #_type, .fn = _fn }
1046
1047 struct xsd_req xsd_reqs[] = {
1048 XSD_REQ(XS_READ, xs_read),
1049 XSD_REQ(XS_WRITE, xs_write),
1050 XSD_REQ(XS_MKDIR, xs_mkdir),
1051 XSD_REQ(XS_DIRECTORY, xs_directory),
1052 XSD_REQ(XS_DIRECTORY_PART, xs_directory_part),
1053 XSD_REQ(XS_TRANSACTION_START, xs_transaction_start),
1054 XSD_REQ(XS_TRANSACTION_END, xs_transaction_end),
1055 XSD_REQ(XS_RM, xs_rm),
1056 XSD_REQ(XS_GET_PERMS, xs_get_perms),
1057 XSD_REQ(XS_SET_PERMS, xs_set_perms),
1058 XSD_REQ(XS_WATCH, xs_watch),
1059 XSD_REQ(XS_UNWATCH, xs_unwatch),
1060 XSD_REQ(XS_CONTROL, xs_priv),
1061 XSD_REQ(XS_INTRODUCE, xs_priv),
1062 XSD_REQ(XS_RELEASE, xs_priv),
1063 XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv),
1064 XSD_REQ(XS_RESUME, xs_priv),
1065 XSD_REQ(XS_SET_TARGET, xs_priv),
1066 XSD_REQ(XS_RESET_WATCHES, xs_reset_watches),
1067 };
1068
process_req(XenXenstoreState * s)1069 static void process_req(XenXenstoreState *s)
1070 {
1071 struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1072 xs_impl handler = NULL;
1073
1074 assert(req_pending(s));
1075 assert(!s->rsp_pending);
1076
1077 if (req->type < ARRAY_SIZE(xsd_reqs)) {
1078 handler = xsd_reqs[req->type].fn;
1079 }
1080 if (!handler) {
1081 handler = &xs_unimpl;
1082 }
1083
1084 handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len);
1085
1086 s->rsp_pending = true;
1087 reset_req(s);
1088 }
1089
copy_from_ring(XenXenstoreState * s,uint8_t * ptr,unsigned int len)1090 static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr,
1091 unsigned int len)
1092 {
1093 if (!len) {
1094 return 0;
1095 }
1096
1097 XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod);
1098 XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons);
1099 unsigned int copied = 0;
1100
1101 /* Ensure the ring contents don't cross the req_prod access. */
1102 smp_rmb();
1103
1104 while (len) {
1105 unsigned int avail = prod - cons;
1106 unsigned int offset = MASK_XENSTORE_IDX(cons);
1107 unsigned int copylen = avail;
1108
1109 if (avail > XENSTORE_RING_SIZE) {
1110 error_report("XenStore ring handling error");
1111 s->fatal_error = true;
1112 break;
1113 } else if (avail == 0) {
1114 break;
1115 }
1116
1117 if (copylen > len) {
1118 copylen = len;
1119 }
1120 if (copylen > XENSTORE_RING_SIZE - offset) {
1121 copylen = XENSTORE_RING_SIZE - offset;
1122 }
1123
1124 memcpy(ptr, &s->xs->req[offset], copylen);
1125 copied += copylen;
1126
1127 ptr += copylen;
1128 len -= copylen;
1129
1130 cons += copylen;
1131 }
1132
1133 /*
1134 * Not sure this ever mattered except on Alpha, but this barrier
1135 * is to ensure that the update to req_cons is globally visible
1136 * only after we have consumed all the data from the ring, and we
1137 * don't end up seeing data written to the ring *after* the other
1138 * end sees the update and writes more to the ring. Xen's own
1139 * xenstored has the same barrier here (although with no comment
1140 * at all, obviously, because it's Xen code).
1141 */
1142 smp_mb();
1143
1144 qatomic_set(&s->xs->req_cons, cons);
1145
1146 return copied;
1147 }
1148
copy_to_ring(XenXenstoreState * s,uint8_t * ptr,unsigned int len)1149 static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
1150 unsigned int len)
1151 {
1152 if (!len) {
1153 return 0;
1154 }
1155
1156 XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons);
1157 XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod);
1158 unsigned int copied = 0;
1159
1160 /*
1161 * This matches the barrier in copy_to_ring() (or the guest's
1162 * equivalent) between writing the data to the ring and updating
1163 * rsp_prod. It protects against the pathological case (which
1164 * again I think never happened except on Alpha) where our
1165 * subsequent writes to the ring could *cross* the read of
1166 * rsp_cons and the guest could see the new data when it was
1167 * intending to read the old.
1168 */
1169 smp_mb();
1170
1171 while (len) {
1172 unsigned int avail = cons + XENSTORE_RING_SIZE - prod;
1173 unsigned int offset = MASK_XENSTORE_IDX(prod);
1174 unsigned int copylen = len;
1175
1176 if (avail > XENSTORE_RING_SIZE) {
1177 error_report("XenStore ring handling error");
1178 s->fatal_error = true;
1179 break;
1180 } else if (avail == 0) {
1181 break;
1182 }
1183
1184 if (copylen > avail) {
1185 copylen = avail;
1186 }
1187 if (copylen > XENSTORE_RING_SIZE - offset) {
1188 copylen = XENSTORE_RING_SIZE - offset;
1189 }
1190
1191
1192 memcpy(&s->xs->rsp[offset], ptr, copylen);
1193 copied += copylen;
1194
1195 ptr += copylen;
1196 len -= copylen;
1197
1198 prod += copylen;
1199 }
1200
1201 /* Ensure the ring contents are seen before rsp_prod update. */
1202 smp_wmb();
1203
1204 qatomic_set(&s->xs->rsp_prod, prod);
1205
1206 return copied;
1207 }
1208
get_req(XenXenstoreState * s)1209 static unsigned int get_req(XenXenstoreState *s)
1210 {
1211 unsigned int copied = 0;
1212
1213 if (s->fatal_error) {
1214 return 0;
1215 }
1216
1217 assert(!req_pending(s));
1218
1219 if (s->req_offset < XENSTORE_HEADER_SIZE) {
1220 void *ptr = s->req_data + s->req_offset;
1221 unsigned int len = XENSTORE_HEADER_SIZE;
1222 unsigned int copylen = copy_from_ring(s, ptr, len);
1223
1224 copied += copylen;
1225 s->req_offset += copylen;
1226 }
1227
1228 if (s->req_offset >= XENSTORE_HEADER_SIZE) {
1229 struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1230
1231 if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) {
1232 error_report("Illegal XenStore request");
1233 s->fatal_error = true;
1234 return 0;
1235 }
1236
1237 void *ptr = s->req_data + s->req_offset;
1238 unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset;
1239 unsigned int copylen = copy_from_ring(s, ptr, len);
1240
1241 copied += copylen;
1242 s->req_offset += copylen;
1243 }
1244
1245 return copied;
1246 }
1247
put_rsp(XenXenstoreState * s)1248 static unsigned int put_rsp(XenXenstoreState *s)
1249 {
1250 if (s->fatal_error) {
1251 return 0;
1252 }
1253
1254 assert(s->rsp_pending);
1255
1256 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1257 assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len);
1258
1259 void *ptr = s->rsp_data + s->rsp_offset;
1260 unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset;
1261 unsigned int copylen = copy_to_ring(s, ptr, len);
1262
1263 s->rsp_offset += copylen;
1264
1265 /* Have we produced a complete response? */
1266 if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) {
1267 reset_rsp(s);
1268 }
1269
1270 return copylen;
1271 }
1272
deliver_watch(XenXenstoreState * s,const char * path,const char * token)1273 static void deliver_watch(XenXenstoreState *s, const char *path,
1274 const char *token)
1275 {
1276 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1277 uint8_t *rsp_data = (uint8_t *)&rsp[1];
1278 unsigned int len;
1279
1280 assert(!s->rsp_pending);
1281
1282 trace_xenstore_watch_event(path, token);
1283
1284 rsp->type = XS_WATCH_EVENT;
1285 rsp->req_id = 0;
1286 rsp->tx_id = 0;
1287 rsp->len = 0;
1288
1289 len = strlen(path);
1290
1291 /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */
1292 assert(rsp->len + len < XENSTORE_PAYLOAD_MAX);
1293
1294 memcpy(&rsp_data[rsp->len], path, len);
1295 rsp->len += len;
1296 rsp_data[rsp->len] = '\0';
1297 rsp->len++;
1298
1299 len = strlen(token);
1300 /*
1301 * It is possible for the guest to have chosen a token that will
1302 * not fit (along with the patch) into a watch event. We have no
1303 * choice but to drop the event if this is the case.
1304 */
1305 if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) {
1306 return;
1307 }
1308
1309 memcpy(&rsp_data[rsp->len], token, len);
1310 rsp->len += len;
1311 rsp_data[rsp->len] = '\0';
1312 rsp->len++;
1313
1314 s->rsp_pending = true;
1315 }
1316
1317 struct watch_event {
1318 char *path;
1319 char *token;
1320 };
1321
free_watch_event(struct watch_event * ev)1322 static void free_watch_event(struct watch_event *ev)
1323 {
1324 if (ev) {
1325 g_free(ev->path);
1326 g_free(ev->token);
1327 g_free(ev);
1328 }
1329 }
1330
queue_watch(XenXenstoreState * s,const char * path,const char * token)1331 static void queue_watch(XenXenstoreState *s, const char *path,
1332 const char *token)
1333 {
1334 struct watch_event *ev = g_new0(struct watch_event, 1);
1335
1336 ev->path = g_strdup(path);
1337 ev->token = g_strdup(token);
1338
1339 s->watch_events = g_list_append(s->watch_events, ev);
1340 }
1341
fire_watch_cb(void * opaque,const char * path,const char * token)1342 static void fire_watch_cb(void *opaque, const char *path, const char *token)
1343 {
1344 XenXenstoreState *s = opaque;
1345
1346 assert(bql_locked());
1347
1348 /*
1349 * If there's a response pending, we obviously can't scribble over
1350 * it. But if there's a request pending, it has dibs on the buffer
1351 * too.
1352 *
1353 * In the common case of a watch firing due to backend activity
1354 * when the ring was otherwise idle, we should be able to copy the
1355 * strings directly into the rsp_data and thence the actual ring,
1356 * without needing to perform any allocations and queue them.
1357 */
1358 if (s->rsp_pending || req_pending(s)) {
1359 queue_watch(s, path, token);
1360 } else {
1361 deliver_watch(s, path, token);
1362 /*
1363 * Attempt to queue the message into the actual ring, and send
1364 * the event channel notification if any bytes are copied.
1365 */
1366 if (s->rsp_pending && put_rsp(s) > 0) {
1367 xen_be_evtchn_notify(s->eh, s->be_port);
1368 }
1369 }
1370 }
1371
process_watch_events(XenXenstoreState * s)1372 static void process_watch_events(XenXenstoreState *s)
1373 {
1374 struct watch_event *ev = s->watch_events->data;
1375
1376 deliver_watch(s, ev->path, ev->token);
1377
1378 s->watch_events = g_list_remove(s->watch_events, ev);
1379 free_watch_event(ev);
1380 }
1381
xen_xenstore_event(void * opaque)1382 static void xen_xenstore_event(void *opaque)
1383 {
1384 XenXenstoreState *s = opaque;
1385 evtchn_port_t port = xen_be_evtchn_pending(s->eh);
1386 unsigned int copied_to, copied_from;
1387 bool processed, notify = false;
1388
1389 if (port != s->be_port) {
1390 return;
1391 }
1392
1393 /* We know this is a no-op. */
1394 xen_be_evtchn_unmask(s->eh, port);
1395
1396 do {
1397 copied_to = copied_from = 0;
1398 processed = false;
1399
1400 if (!s->rsp_pending && s->watch_events) {
1401 process_watch_events(s);
1402 }
1403
1404 if (s->rsp_pending) {
1405 copied_to = put_rsp(s);
1406 }
1407
1408 if (!req_pending(s)) {
1409 copied_from = get_req(s);
1410 }
1411
1412 if (req_pending(s) && !s->rsp_pending && !s->watch_events) {
1413 process_req(s);
1414 processed = true;
1415 }
1416
1417 notify |= copied_to || copied_from;
1418 } while (copied_to || copied_from || processed);
1419
1420 if (notify) {
1421 xen_be_evtchn_notify(s->eh, s->be_port);
1422 }
1423 }
1424
alloc_guest_port(XenXenstoreState * s)1425 static void alloc_guest_port(XenXenstoreState *s)
1426 {
1427 struct evtchn_alloc_unbound alloc = {
1428 .dom = DOMID_SELF,
1429 .remote_dom = DOMID_QEMU,
1430 };
1431
1432 if (!xen_evtchn_alloc_unbound_op(&alloc)) {
1433 s->guest_port = alloc.port;
1434 }
1435 }
1436
xen_xenstore_reset(void)1437 int xen_xenstore_reset(void)
1438 {
1439 XenXenstoreState *s = xen_xenstore_singleton;
1440 int console_port;
1441 GList *perms;
1442 int err;
1443
1444 if (!s) {
1445 return -ENOTSUP;
1446 }
1447
1448 s->req_offset = s->rsp_offset = 0;
1449 s->rsp_pending = false;
1450
1451 if (!memory_region_is_mapped(&s->xenstore_page)) {
1452 uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS;
1453 xen_overlay_do_map_page(&s->xenstore_page, gpa);
1454 }
1455
1456 alloc_guest_port(s);
1457
1458 /*
1459 * As qemu/dom0, bind to the guest's port. For incoming migration, this
1460 * will be unbound as the guest's evtchn table is overwritten. We then
1461 * rebind to the correct guest port in xen_xenstore_post_load().
1462 */
1463 err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port);
1464 if (err < 0) {
1465 return err;
1466 }
1467 s->be_port = err;
1468
1469 /* Create frontend store nodes */
1470 perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
1471 perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
1472
1473 relpath_printf(s, perms, "store/port", "%u", s->guest_port);
1474 relpath_printf(s, perms, "store/ring-ref", "%lu",
1475 XEN_SPECIAL_PFN(XENSTORE));
1476
1477 console_port = xen_primary_console_get_port();
1478 if (console_port) {
1479 relpath_printf(s, perms, "console/ring-ref", "%lu",
1480 XEN_SPECIAL_PFN(CONSOLE));
1481 relpath_printf(s, perms, "console/port", "%u", console_port);
1482 relpath_printf(s, perms, "console/state", "%u", XenbusStateInitialised);
1483 }
1484
1485 g_list_free_full(perms, g_free);
1486
1487 /*
1488 * We don't actually access the guest's page through the grant, because
1489 * this isn't real Xen, and we can just use the page we gave it in the
1490 * first place. Map the grant anyway, mostly for cosmetic purposes so
1491 * it *looks* like it's in use in the guest-visible grant table.
1492 */
1493 s->gt = qemu_xen_gnttab_open();
1494 uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
1495 s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref,
1496 PROT_READ | PROT_WRITE);
1497
1498 return 0;
1499 }
1500
1501 struct qemu_xs_handle {
1502 XenstoreImplState *impl;
1503 GList *watches;
1504 QEMUBH *watch_bh;
1505 };
1506
1507 struct qemu_xs_watch {
1508 struct qemu_xs_handle *h;
1509 char *path;
1510 xs_watch_fn fn;
1511 void *opaque;
1512 GList *events;
1513 };
1514
xs_be_get_domain_path(struct qemu_xs_handle * h,unsigned int domid)1515 static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
1516 {
1517 return g_strdup_printf("/local/domain/%u", domid);
1518 }
1519
xs_be_directory(struct qemu_xs_handle * h,xs_transaction_t t,const char * path,unsigned int * num)1520 static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
1521 const char *path, unsigned int *num)
1522 {
1523 GList *items = NULL, *l;
1524 unsigned int i = 0;
1525 char **items_ret;
1526 int err;
1527
1528 err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items);
1529 if (err) {
1530 errno = err;
1531 return NULL;
1532 }
1533
1534 items_ret = g_new0(char *, g_list_length(items) + 1);
1535 *num = 0;
1536 for (l = items; l; l = l->next) {
1537 items_ret[i++] = l->data;
1538 (*num)++;
1539 }
1540 g_list_free(items);
1541 return items_ret;
1542 }
1543
xs_be_read(struct qemu_xs_handle * h,xs_transaction_t t,const char * path,unsigned int * len)1544 static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
1545 const char *path, unsigned int *len)
1546 {
1547 GByteArray *data = g_byte_array_new();
1548 bool free_segment = false;
1549 int err;
1550
1551 err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1552 if (err) {
1553 free_segment = true;
1554 errno = err;
1555 } else {
1556 if (len) {
1557 *len = data->len;
1558 }
1559 /* The xen-bus-helper code expects to get NUL terminated string! */
1560 g_byte_array_append(data, (void *)"", 1);
1561 }
1562
1563 return g_byte_array_free(data, free_segment);
1564 }
1565
xs_be_write(struct qemu_xs_handle * h,xs_transaction_t t,const char * path,const void * data,unsigned int len)1566 static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
1567 const char *path, const void *data, unsigned int len)
1568 {
1569 GByteArray *gdata = g_byte_array_new();
1570 int err;
1571
1572 g_byte_array_append(gdata, data, len);
1573 err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
1574 g_byte_array_unref(gdata);
1575 if (err) {
1576 errno = err;
1577 return false;
1578 }
1579 return true;
1580 }
1581
xs_be_create(struct qemu_xs_handle * h,xs_transaction_t t,unsigned int owner,unsigned int domid,unsigned int perms,const char * path)1582 static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
1583 unsigned int owner, unsigned int domid,
1584 unsigned int perms, const char *path)
1585 {
1586 g_autoptr(GByteArray) data = g_byte_array_new();
1587 GList *perms_list = NULL;
1588 int err;
1589
1590 /* mkdir does this */
1591 err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1592 if (err == ENOENT) {
1593 err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data);
1594 }
1595 if (err) {
1596 errno = err;
1597 return false;
1598 }
1599
1600 perms_list = g_list_append(perms_list,
1601 xs_perm_as_string(XS_PERM_NONE, owner));
1602 perms_list = g_list_append(perms_list,
1603 xs_perm_as_string(perms, domid));
1604
1605 err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list);
1606 g_list_free_full(perms_list, g_free);
1607 if (err) {
1608 errno = err;
1609 return false;
1610 }
1611 return true;
1612 }
1613
xs_be_destroy(struct qemu_xs_handle * h,xs_transaction_t t,const char * path)1614 static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
1615 const char *path)
1616 {
1617 int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path);
1618 if (err) {
1619 errno = err;
1620 return false;
1621 }
1622 return true;
1623 }
1624
be_watch_bh(void * _h)1625 static void be_watch_bh(void *_h)
1626 {
1627 struct qemu_xs_handle *h = _h;
1628 GList *l;
1629
1630 for (l = h->watches; l; l = l->next) {
1631 struct qemu_xs_watch *w = l->data;
1632
1633 while (w->events) {
1634 struct watch_event *ev = w->events->data;
1635
1636 w->fn(w->opaque, ev->path);
1637
1638 w->events = g_list_remove(w->events, ev);
1639 free_watch_event(ev);
1640 }
1641 }
1642 }
1643
xs_be_watch_cb(void * opaque,const char * path,const char * token)1644 static void xs_be_watch_cb(void *opaque, const char *path, const char *token)
1645 {
1646 struct watch_event *ev = g_new0(struct watch_event, 1);
1647 struct qemu_xs_watch *w = opaque;
1648
1649 /* We don't care about the token */
1650 ev->path = g_strdup(path);
1651 w->events = g_list_append(w->events, ev);
1652
1653 qemu_bh_schedule(w->h->watch_bh);
1654 }
1655
xs_be_watch(struct qemu_xs_handle * h,const char * path,xs_watch_fn fn,void * opaque)1656 static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h,
1657 const char *path, xs_watch_fn fn,
1658 void *opaque)
1659 {
1660 struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
1661 int err;
1662
1663 w->h = h;
1664 w->fn = fn;
1665 w->opaque = opaque;
1666
1667 err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w);
1668 if (err) {
1669 errno = err;
1670 g_free(w);
1671 return NULL;
1672 }
1673
1674 w->path = g_strdup(path);
1675 h->watches = g_list_append(h->watches, w);
1676 return w;
1677 }
1678
xs_be_unwatch(struct qemu_xs_handle * h,struct qemu_xs_watch * w)1679 static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
1680 {
1681 xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w);
1682
1683 h->watches = g_list_remove(h->watches, w);
1684 g_list_free_full(w->events, (GDestroyNotify)free_watch_event);
1685 g_free(w->path);
1686 g_free(w);
1687 }
1688
xs_be_transaction_start(struct qemu_xs_handle * h)1689 static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h)
1690 {
1691 unsigned int new_tx = XBT_NULL;
1692 int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx);
1693 if (err) {
1694 errno = err;
1695 return XBT_NULL;
1696 }
1697 return new_tx;
1698 }
1699
xs_be_transaction_end(struct qemu_xs_handle * h,xs_transaction_t t,bool abort)1700 static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
1701 bool abort)
1702 {
1703 int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort);
1704 if (err) {
1705 errno = err;
1706 return false;
1707 }
1708 return true;
1709 }
1710
xs_be_open(void)1711 static struct qemu_xs_handle *xs_be_open(void)
1712 {
1713 XenXenstoreState *s = xen_xenstore_singleton;
1714 struct qemu_xs_handle *h;
1715
1716 if (!s || !s->impl) {
1717 errno = -ENOSYS;
1718 return NULL;
1719 }
1720
1721 h = g_new0(struct qemu_xs_handle, 1);
1722 h->impl = s->impl;
1723
1724 h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h);
1725
1726 return h;
1727 }
1728
xs_be_close(struct qemu_xs_handle * h)1729 static void xs_be_close(struct qemu_xs_handle *h)
1730 {
1731 while (h->watches) {
1732 struct qemu_xs_watch *w = h->watches->data;
1733 xs_be_unwatch(h, w);
1734 }
1735
1736 qemu_bh_delete(h->watch_bh);
1737 g_free(h);
1738 }
1739
1740 static struct xenstore_backend_ops emu_xenstore_backend_ops = {
1741 .open = xs_be_open,
1742 .close = xs_be_close,
1743 .get_domain_path = xs_be_get_domain_path,
1744 .directory = xs_be_directory,
1745 .read = xs_be_read,
1746 .write = xs_be_write,
1747 .create = xs_be_create,
1748 .destroy = xs_be_destroy,
1749 .watch = xs_be_watch,
1750 .unwatch = xs_be_unwatch,
1751 .transaction_start = xs_be_transaction_start,
1752 .transaction_end = xs_be_transaction_end,
1753 };
1754