1 /*
2 * vfio protocol over a UNIX socket device handling.
3 *
4 * Copyright © 2018, 2021 Oracle and/or its affiliates.
5 *
6 * SPDX-License-Identifier: GPL-2.0-or-later
7 */
8
9 #include "qemu/osdep.h"
10 #include "qapi/error.h"
11 #include "qemu/error-report.h"
12 #include "qemu/lockable.h"
13 #include "qemu/thread.h"
14
15 #include "hw/vfio-user/device.h"
16 #include "hw/vfio-user/trace.h"
17
18 /*
19 * These are to defend against a malign server trying
20 * to force us to run out of memory.
21 */
22 #define VFIO_USER_MAX_REGIONS 100
23 #define VFIO_USER_MAX_IRQS 50
24
vfio_user_get_device_info(VFIOUserProxy * proxy,struct vfio_device_info * info,Error ** errp)25 bool vfio_user_get_device_info(VFIOUserProxy *proxy,
26 struct vfio_device_info *info, Error **errp)
27 {
28 VFIOUserDeviceInfo msg;
29 uint32_t argsz = sizeof(msg) - sizeof(msg.hdr);
30
31 memset(&msg, 0, sizeof(msg));
32 vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_INFO, sizeof(msg), 0);
33 msg.argsz = argsz;
34
35 if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, errp)) {
36 return false;
37 }
38
39 if (msg.hdr.flags & VFIO_USER_ERROR) {
40 error_setg_errno(errp, -msg.hdr.error_reply,
41 "VFIO_USER_DEVICE_GET_INFO failed");
42 return false;
43 }
44
45 trace_vfio_user_get_info(msg.num_regions, msg.num_irqs);
46
47 memcpy(info, &msg.argsz, argsz);
48
49 /* defend against a malicious server */
50 if (info->num_regions > VFIO_USER_MAX_REGIONS ||
51 info->num_irqs > VFIO_USER_MAX_IRQS) {
52 error_setg_errno(errp, EINVAL, "invalid reply");
53 return false;
54 }
55
56 return true;
57 }
58
vfio_user_device_reset(VFIOUserProxy * proxy)59 void vfio_user_device_reset(VFIOUserProxy *proxy)
60 {
61 Error *local_err = NULL;
62 VFIOUserHdr hdr;
63
64 vfio_user_request_msg(&hdr, VFIO_USER_DEVICE_RESET, sizeof(hdr), 0);
65
66 if (!vfio_user_send_wait(proxy, &hdr, NULL, 0, &local_err)) {
67 error_prepend(&local_err, "%s: ", __func__);
68 error_report_err(local_err);
69 return;
70 }
71
72 if (hdr.flags & VFIO_USER_ERROR) {
73 error_printf("reset reply error %d\n", hdr.error_reply);
74 }
75 }
76
vfio_user_get_region_info(VFIOUserProxy * proxy,struct vfio_region_info * info,VFIOUserFDs * fds)77 static int vfio_user_get_region_info(VFIOUserProxy *proxy,
78 struct vfio_region_info *info,
79 VFIOUserFDs *fds)
80 {
81 g_autofree VFIOUserRegionInfo *msgp = NULL;
82 Error *local_err = NULL;
83 uint32_t size;
84
85 /* data returned can be larger than vfio_region_info */
86 if (info->argsz < sizeof(*info)) {
87 error_printf("vfio_user_get_region_info argsz too small\n");
88 return -E2BIG;
89 }
90 if (fds != NULL && fds->send_fds != 0) {
91 error_printf("vfio_user_get_region_info can't send FDs\n");
92 return -EINVAL;
93 }
94
95 size = info->argsz + sizeof(VFIOUserHdr);
96 msgp = g_malloc0(size);
97
98 vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO,
99 sizeof(*msgp), 0);
100 msgp->argsz = info->argsz;
101 msgp->index = info->index;
102
103 if (!vfio_user_send_wait(proxy, &msgp->hdr, fds, size, &local_err)) {
104 error_prepend(&local_err, "%s: ", __func__);
105 error_report_err(local_err);
106 return -EFAULT;
107 }
108
109 if (msgp->hdr.flags & VFIO_USER_ERROR) {
110 return -msgp->hdr.error_reply;
111 }
112 trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size);
113
114 memcpy(info, &msgp->argsz, info->argsz);
115
116 /*
117 * If at least one region is directly mapped into the VM, then we can no
118 * longer rely on the sequential nature of vfio-user request handling to
119 * ensure that posted writes are completed before a subsequent read. In this
120 * case, disable posted write support. This is a per-device property, not
121 * per-region.
122 */
123 if (info->flags & VFIO_REGION_INFO_FLAG_MMAP) {
124 vfio_user_disable_posted_writes(proxy);
125 }
126
127 return 0;
128 }
129
vfio_user_device_io_get_region_info(VFIODevice * vbasedev,struct vfio_region_info * info,int * fd)130 static int vfio_user_device_io_get_region_info(VFIODevice *vbasedev,
131 struct vfio_region_info *info,
132 int *fd)
133 {
134 VFIOUserFDs fds = { 0, 1, fd};
135 int ret;
136
137 if (info->index > vbasedev->num_regions) {
138 return -EINVAL;
139 }
140
141 ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds);
142 if (ret) {
143 return ret;
144 }
145
146 /* cap_offset in valid area */
147 if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) &&
148 (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) {
149 return -EINVAL;
150 }
151
152 return 0;
153 }
154
vfio_user_device_io_get_irq_info(VFIODevice * vbasedev,struct vfio_irq_info * info)155 static int vfio_user_device_io_get_irq_info(VFIODevice *vbasedev,
156 struct vfio_irq_info *info)
157 {
158 VFIOUserProxy *proxy = vbasedev->proxy;
159 Error *local_err = NULL;
160 VFIOUserIRQInfo msg;
161
162 memset(&msg, 0, sizeof(msg));
163 vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO,
164 sizeof(msg), 0);
165 msg.argsz = info->argsz;
166 msg.index = info->index;
167
168 if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, &local_err)) {
169 error_prepend(&local_err, "%s: ", __func__);
170 error_report_err(local_err);
171 return -EFAULT;
172 }
173
174 if (msg.hdr.flags & VFIO_USER_ERROR) {
175 return -msg.hdr.error_reply;
176 }
177 trace_vfio_user_get_irq_info(msg.index, msg.flags, msg.count);
178
179 memcpy(info, &msg.argsz, sizeof(*info));
180 return 0;
181 }
182
irq_howmany(int * fdp,uint32_t cur,uint32_t max)183 static int irq_howmany(int *fdp, uint32_t cur, uint32_t max)
184 {
185 int n = 0;
186
187 if (fdp[cur] != -1) {
188 do {
189 n++;
190 } while (n < max && fdp[cur + n] != -1);
191 } else {
192 do {
193 n++;
194 } while (n < max && fdp[cur + n] == -1);
195 }
196
197 return n;
198 }
199
vfio_user_device_io_set_irqs(VFIODevice * vbasedev,struct vfio_irq_set * irq)200 static int vfio_user_device_io_set_irqs(VFIODevice *vbasedev,
201 struct vfio_irq_set *irq)
202 {
203 VFIOUserProxy *proxy = vbasedev->proxy;
204 g_autofree VFIOUserIRQSet *msgp = NULL;
205 uint32_t size, nfds, send_fds, sent_fds, max;
206 Error *local_err = NULL;
207
208 if (irq->argsz < sizeof(*irq)) {
209 error_printf("vfio_user_set_irqs argsz too small\n");
210 return -EINVAL;
211 }
212
213 /*
214 * Handle simple case
215 */
216 if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) {
217 size = sizeof(VFIOUserHdr) + irq->argsz;
218 msgp = g_malloc0(size);
219
220 vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0);
221 msgp->argsz = irq->argsz;
222 msgp->flags = irq->flags;
223 msgp->index = irq->index;
224 msgp->start = irq->start;
225 msgp->count = irq->count;
226 trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
227 msgp->flags);
228
229 if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
230 error_prepend(&local_err, "%s: ", __func__);
231 error_report_err(local_err);
232 return -EFAULT;
233 }
234
235 if (msgp->hdr.flags & VFIO_USER_ERROR) {
236 return -msgp->hdr.error_reply;
237 }
238
239 return 0;
240 }
241
242 /*
243 * Calculate the number of FDs to send
244 * and adjust argsz
245 */
246 nfds = (irq->argsz - sizeof(*irq)) / sizeof(int);
247 irq->argsz = sizeof(*irq);
248 msgp = g_malloc0(sizeof(*msgp));
249 /*
250 * Send in chunks if over max_send_fds
251 */
252 for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) {
253 VFIOUserFDs *arg_fds, loop_fds;
254
255 /* must send all valid FDs or all invalid FDs in single msg */
256 max = nfds - sent_fds;
257 if (max > proxy->max_send_fds) {
258 max = proxy->max_send_fds;
259 }
260 send_fds = irq_howmany((int *)irq->data, sent_fds, max);
261
262 vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS,
263 sizeof(*msgp), 0);
264 msgp->argsz = irq->argsz;
265 msgp->flags = irq->flags;
266 msgp->index = irq->index;
267 msgp->start = irq->start + sent_fds;
268 msgp->count = send_fds;
269 trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
270 msgp->flags);
271
272 loop_fds.send_fds = send_fds;
273 loop_fds.recv_fds = 0;
274 loop_fds.fds = (int *)irq->data + sent_fds;
275 arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL;
276
277 if (!vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, &local_err)) {
278 error_prepend(&local_err, "%s: ", __func__);
279 error_report_err(local_err);
280 return -EFAULT;
281 }
282
283 if (msgp->hdr.flags & VFIO_USER_ERROR) {
284 return -msgp->hdr.error_reply;
285 }
286 }
287
288 return 0;
289 }
290
vfio_user_device_io_region_read(VFIODevice * vbasedev,uint8_t index,off_t off,uint32_t count,void * data)291 static int vfio_user_device_io_region_read(VFIODevice *vbasedev, uint8_t index,
292 off_t off, uint32_t count,
293 void *data)
294 {
295 g_autofree VFIOUserRegionRW *msgp = NULL;
296 VFIOUserProxy *proxy = vbasedev->proxy;
297 int size = sizeof(*msgp) + count;
298 Error *local_err = NULL;
299
300 if (count > proxy->max_xfer_size) {
301 return -EINVAL;
302 }
303
304 msgp = g_malloc0(size);
305 vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0);
306 msgp->offset = off;
307 msgp->region = index;
308 msgp->count = count;
309 trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
310
311 if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, &local_err)) {
312 error_prepend(&local_err, "%s: ", __func__);
313 error_report_err(local_err);
314 return -EFAULT;
315 }
316
317 if (msgp->hdr.flags & VFIO_USER_ERROR) {
318 return -msgp->hdr.error_reply;
319 } else if (msgp->count > count) {
320 return -E2BIG;
321 } else {
322 memcpy(data, &msgp->data, msgp->count);
323 }
324
325 return msgp->count;
326 }
327
328 /*
329 * If this is a posted write, and VFIO_PROXY_NO_POST is not set, then we are OK
330 * to send the write to the socket without waiting for the server's reply:
331 * a subsequent read (of any region) will not pass the posted write, as all
332 * messages are handled sequentially.
333 */
vfio_user_device_io_region_write(VFIODevice * vbasedev,uint8_t index,off_t off,unsigned count,void * data,bool post)334 static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
335 off_t off, unsigned count,
336 void *data, bool post)
337 {
338 VFIOUserRegionRW *msgp = NULL;
339 VFIOUserProxy *proxy = vbasedev->proxy;
340 int size = sizeof(*msgp) + count;
341 Error *local_err = NULL;
342 bool can_multi;
343 int flags = 0;
344 int ret;
345
346 if (count > proxy->max_xfer_size) {
347 return -EINVAL;
348 }
349
350 if (proxy->flags & VFIO_PROXY_NO_POST) {
351 post = false;
352 }
353
354 if (post) {
355 flags |= VFIO_USER_NO_REPLY;
356 }
357
358 /* write eligible to be in a WRITE_MULTI msg ? */
359 can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post &&
360 count <= VFIO_USER_MULTI_DATA;
361
362 /*
363 * This should be a rare case, so first check without the lock,
364 * if we're wrong, vfio_send_queued() will flush any posted writes
365 * we missed here
366 */
367 if (proxy->wr_multi != NULL ||
368 (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) {
369
370 /*
371 * re-check with lock
372 *
373 * if already building a WRITE_MULTI msg,
374 * add this one if possible else flush pending before
375 * sending the current one
376 *
377 * else if outgoing queue is over the highwater,
378 * start a new WRITE_MULTI message
379 */
380 WITH_QEMU_LOCK_GUARD(&proxy->lock) {
381 if (proxy->wr_multi != NULL) {
382 if (can_multi) {
383 vfio_user_add_multi(proxy, index, off, count, data);
384 return count;
385 }
386 vfio_user_flush_multi(proxy);
387 } else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) {
388 vfio_user_create_multi(proxy);
389 vfio_user_add_multi(proxy, index, off, count, data);
390 return count;
391 }
392 }
393 }
394
395 msgp = g_malloc0(size);
396 vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags);
397 msgp->offset = off;
398 msgp->region = index;
399 msgp->count = count;
400 memcpy(&msgp->data, data, count);
401 trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
402
403 /* async send will free msg after it's sent */
404 if (post) {
405 if (!vfio_user_send_async(proxy, &msgp->hdr, NULL, &local_err)) {
406 error_prepend(&local_err, "%s: ", __func__);
407 error_report_err(local_err);
408 return -EFAULT;
409 }
410
411 return count;
412 }
413
414 if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
415 error_prepend(&local_err, "%s: ", __func__);
416 error_report_err(local_err);
417 g_free(msgp);
418 return -EFAULT;
419 }
420
421 if (msgp->hdr.flags & VFIO_USER_ERROR) {
422 ret = -msgp->hdr.error_reply;
423 } else {
424 ret = count;
425 }
426
427 g_free(msgp);
428 return ret;
429 }
430
431 /*
432 * Socket-based io_ops
433 */
434 VFIODeviceIOOps vfio_user_device_io_ops_sock = {
435 .get_region_info = vfio_user_device_io_get_region_info,
436 .get_irq_info = vfio_user_device_io_get_irq_info,
437 .set_irqs = vfio_user_device_io_set_irqs,
438 .region_read = vfio_user_device_io_region_read,
439 .region_write = vfio_user_device_io_region_write,
440
441 };
442