1 /* 2 * vfio protocol over a UNIX socket device handling. 3 * 4 * Copyright © 2018, 2021 Oracle and/or its affiliates. 5 * 6 * SPDX-License-Identifier: GPL-2.0-or-later 7 */ 8 9 #include "qemu/osdep.h" 10 #include "qapi/error.h" 11 #include "qemu/error-report.h" 12 #include "qemu/lockable.h" 13 #include "qemu/thread.h" 14 15 #include "hw/vfio-user/device.h" 16 #include "hw/vfio-user/trace.h" 17 18 /* 19 * These are to defend against a malign server trying 20 * to force us to run out of memory. 21 */ 22 #define VFIO_USER_MAX_REGIONS 100 23 #define VFIO_USER_MAX_IRQS 50 24 25 bool vfio_user_get_device_info(VFIOUserProxy *proxy, 26 struct vfio_device_info *info, Error **errp) 27 { 28 VFIOUserDeviceInfo msg; 29 uint32_t argsz = sizeof(msg) - sizeof(msg.hdr); 30 31 memset(&msg, 0, sizeof(msg)); 32 vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_INFO, sizeof(msg), 0); 33 msg.argsz = argsz; 34 35 if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, errp)) { 36 return false; 37 } 38 39 if (msg.hdr.flags & VFIO_USER_ERROR) { 40 error_setg_errno(errp, -msg.hdr.error_reply, 41 "VFIO_USER_DEVICE_GET_INFO failed"); 42 return false; 43 } 44 45 trace_vfio_user_get_info(msg.num_regions, msg.num_irqs); 46 47 memcpy(info, &msg.argsz, argsz); 48 49 /* defend against a malicious server */ 50 if (info->num_regions > VFIO_USER_MAX_REGIONS || 51 info->num_irqs > VFIO_USER_MAX_IRQS) { 52 error_setg_errno(errp, EINVAL, "invalid reply"); 53 return false; 54 } 55 56 return true; 57 } 58 59 void vfio_user_device_reset(VFIOUserProxy *proxy) 60 { 61 Error *local_err = NULL; 62 VFIOUserHdr hdr; 63 64 vfio_user_request_msg(&hdr, VFIO_USER_DEVICE_RESET, sizeof(hdr), 0); 65 66 if (!vfio_user_send_wait(proxy, &hdr, NULL, 0, &local_err)) { 67 error_prepend(&local_err, "%s: ", __func__); 68 error_report_err(local_err); 69 return; 70 } 71 72 if (hdr.flags & VFIO_USER_ERROR) { 73 error_printf("reset reply error %d\n", hdr.error_reply); 74 } 75 } 76 77 static int vfio_user_get_region_info(VFIOUserProxy *proxy, 78 struct vfio_region_info *info, 79 VFIOUserFDs *fds) 80 { 81 g_autofree VFIOUserRegionInfo *msgp = NULL; 82 Error *local_err = NULL; 83 uint32_t size; 84 85 /* data returned can be larger than vfio_region_info */ 86 if (info->argsz < sizeof(*info)) { 87 error_printf("vfio_user_get_region_info argsz too small\n"); 88 return -E2BIG; 89 } 90 if (fds != NULL && fds->send_fds != 0) { 91 error_printf("vfio_user_get_region_info can't send FDs\n"); 92 return -EINVAL; 93 } 94 95 size = info->argsz + sizeof(VFIOUserHdr); 96 msgp = g_malloc0(size); 97 98 vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO, 99 sizeof(*msgp), 0); 100 msgp->argsz = info->argsz; 101 msgp->index = info->index; 102 103 if (!vfio_user_send_wait(proxy, &msgp->hdr, fds, size, &local_err)) { 104 error_prepend(&local_err, "%s: ", __func__); 105 error_report_err(local_err); 106 return -EFAULT; 107 } 108 109 if (msgp->hdr.flags & VFIO_USER_ERROR) { 110 return -msgp->hdr.error_reply; 111 } 112 trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size); 113 114 memcpy(info, &msgp->argsz, info->argsz); 115 116 /* 117 * If at least one region is directly mapped into the VM, then we can no 118 * longer rely on the sequential nature of vfio-user request handling to 119 * ensure that posted writes are completed before a subsequent read. In this 120 * case, disable posted write support. This is a per-device property, not 121 * per-region. 122 */ 123 if (info->flags & VFIO_REGION_INFO_FLAG_MMAP) { 124 vfio_user_disable_posted_writes(proxy); 125 } 126 127 return 0; 128 } 129 130 static int vfio_user_device_io_get_region_info(VFIODevice *vbasedev, 131 struct vfio_region_info *info, 132 int *fd) 133 { 134 VFIOUserFDs fds = { 0, 1, fd}; 135 int ret; 136 137 if (info->index > vbasedev->num_regions) { 138 return -EINVAL; 139 } 140 141 ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds); 142 if (ret) { 143 return ret; 144 } 145 146 /* cap_offset in valid area */ 147 if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) && 148 (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) { 149 return -EINVAL; 150 } 151 152 return 0; 153 } 154 155 static int vfio_user_device_io_get_irq_info(VFIODevice *vbasedev, 156 struct vfio_irq_info *info) 157 { 158 VFIOUserProxy *proxy = vbasedev->proxy; 159 Error *local_err = NULL; 160 VFIOUserIRQInfo msg; 161 162 memset(&msg, 0, sizeof(msg)); 163 vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO, 164 sizeof(msg), 0); 165 msg.argsz = info->argsz; 166 msg.index = info->index; 167 168 if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, &local_err)) { 169 error_prepend(&local_err, "%s: ", __func__); 170 error_report_err(local_err); 171 return -EFAULT; 172 } 173 174 if (msg.hdr.flags & VFIO_USER_ERROR) { 175 return -msg.hdr.error_reply; 176 } 177 trace_vfio_user_get_irq_info(msg.index, msg.flags, msg.count); 178 179 memcpy(info, &msg.argsz, sizeof(*info)); 180 return 0; 181 } 182 183 static int irq_howmany(int *fdp, uint32_t cur, uint32_t max) 184 { 185 int n = 0; 186 187 if (fdp[cur] != -1) { 188 do { 189 n++; 190 } while (n < max && fdp[cur + n] != -1); 191 } else { 192 do { 193 n++; 194 } while (n < max && fdp[cur + n] == -1); 195 } 196 197 return n; 198 } 199 200 static int vfio_user_device_io_set_irqs(VFIODevice *vbasedev, 201 struct vfio_irq_set *irq) 202 { 203 VFIOUserProxy *proxy = vbasedev->proxy; 204 g_autofree VFIOUserIRQSet *msgp = NULL; 205 uint32_t size, nfds, send_fds, sent_fds, max; 206 Error *local_err = NULL; 207 208 if (irq->argsz < sizeof(*irq)) { 209 error_printf("vfio_user_set_irqs argsz too small\n"); 210 return -EINVAL; 211 } 212 213 /* 214 * Handle simple case 215 */ 216 if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) { 217 size = sizeof(VFIOUserHdr) + irq->argsz; 218 msgp = g_malloc0(size); 219 220 vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0); 221 msgp->argsz = irq->argsz; 222 msgp->flags = irq->flags; 223 msgp->index = irq->index; 224 msgp->start = irq->start; 225 msgp->count = irq->count; 226 trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count, 227 msgp->flags); 228 229 if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) { 230 error_prepend(&local_err, "%s: ", __func__); 231 error_report_err(local_err); 232 return -EFAULT; 233 } 234 235 if (msgp->hdr.flags & VFIO_USER_ERROR) { 236 return -msgp->hdr.error_reply; 237 } 238 239 return 0; 240 } 241 242 /* 243 * Calculate the number of FDs to send 244 * and adjust argsz 245 */ 246 nfds = (irq->argsz - sizeof(*irq)) / sizeof(int); 247 irq->argsz = sizeof(*irq); 248 msgp = g_malloc0(sizeof(*msgp)); 249 /* 250 * Send in chunks if over max_send_fds 251 */ 252 for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) { 253 VFIOUserFDs *arg_fds, loop_fds; 254 255 /* must send all valid FDs or all invalid FDs in single msg */ 256 max = nfds - sent_fds; 257 if (max > proxy->max_send_fds) { 258 max = proxy->max_send_fds; 259 } 260 send_fds = irq_howmany((int *)irq->data, sent_fds, max); 261 262 vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, 263 sizeof(*msgp), 0); 264 msgp->argsz = irq->argsz; 265 msgp->flags = irq->flags; 266 msgp->index = irq->index; 267 msgp->start = irq->start + sent_fds; 268 msgp->count = send_fds; 269 trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count, 270 msgp->flags); 271 272 loop_fds.send_fds = send_fds; 273 loop_fds.recv_fds = 0; 274 loop_fds.fds = (int *)irq->data + sent_fds; 275 arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL; 276 277 if (!vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, &local_err)) { 278 error_prepend(&local_err, "%s: ", __func__); 279 error_report_err(local_err); 280 return -EFAULT; 281 } 282 283 if (msgp->hdr.flags & VFIO_USER_ERROR) { 284 return -msgp->hdr.error_reply; 285 } 286 } 287 288 return 0; 289 } 290 291 static int vfio_user_device_io_region_read(VFIODevice *vbasedev, uint8_t index, 292 off_t off, uint32_t count, 293 void *data) 294 { 295 g_autofree VFIOUserRegionRW *msgp = NULL; 296 VFIOUserProxy *proxy = vbasedev->proxy; 297 int size = sizeof(*msgp) + count; 298 Error *local_err = NULL; 299 300 if (count > proxy->max_xfer_size) { 301 return -EINVAL; 302 } 303 304 msgp = g_malloc0(size); 305 vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0); 306 msgp->offset = off; 307 msgp->region = index; 308 msgp->count = count; 309 trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count); 310 311 if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, &local_err)) { 312 error_prepend(&local_err, "%s: ", __func__); 313 error_report_err(local_err); 314 return -EFAULT; 315 } 316 317 if (msgp->hdr.flags & VFIO_USER_ERROR) { 318 return -msgp->hdr.error_reply; 319 } else if (msgp->count > count) { 320 return -E2BIG; 321 } else { 322 memcpy(data, &msgp->data, msgp->count); 323 } 324 325 return msgp->count; 326 } 327 328 /* 329 * If this is a posted write, and VFIO_PROXY_NO_POST is not set, then we are OK 330 * to send the write to the socket without waiting for the server's reply: 331 * a subsequent read (of any region) will not pass the posted write, as all 332 * messages are handled sequentially. 333 */ 334 static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index, 335 off_t off, unsigned count, 336 void *data, bool post) 337 { 338 VFIOUserRegionRW *msgp = NULL; 339 VFIOUserProxy *proxy = vbasedev->proxy; 340 int size = sizeof(*msgp) + count; 341 Error *local_err = NULL; 342 bool can_multi; 343 int flags = 0; 344 int ret; 345 346 if (count > proxy->max_xfer_size) { 347 return -EINVAL; 348 } 349 350 if (proxy->flags & VFIO_PROXY_NO_POST) { 351 post = false; 352 } 353 354 if (post) { 355 flags |= VFIO_USER_NO_REPLY; 356 } 357 358 /* write eligible to be in a WRITE_MULTI msg ? */ 359 can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post && 360 count <= VFIO_USER_MULTI_DATA; 361 362 /* 363 * This should be a rare case, so first check without the lock, 364 * if we're wrong, vfio_send_queued() will flush any posted writes 365 * we missed here 366 */ 367 if (proxy->wr_multi != NULL || 368 (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) { 369 370 /* 371 * re-check with lock 372 * 373 * if already building a WRITE_MULTI msg, 374 * add this one if possible else flush pending before 375 * sending the current one 376 * 377 * else if outgoing queue is over the highwater, 378 * start a new WRITE_MULTI message 379 */ 380 WITH_QEMU_LOCK_GUARD(&proxy->lock) { 381 if (proxy->wr_multi != NULL) { 382 if (can_multi) { 383 vfio_user_add_multi(proxy, index, off, count, data); 384 return count; 385 } 386 vfio_user_flush_multi(proxy); 387 } else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) { 388 vfio_user_create_multi(proxy); 389 vfio_user_add_multi(proxy, index, off, count, data); 390 return count; 391 } 392 } 393 } 394 395 msgp = g_malloc0(size); 396 vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags); 397 msgp->offset = off; 398 msgp->region = index; 399 msgp->count = count; 400 memcpy(&msgp->data, data, count); 401 trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count); 402 403 /* async send will free msg after it's sent */ 404 if (post) { 405 if (!vfio_user_send_async(proxy, &msgp->hdr, NULL, &local_err)) { 406 error_prepend(&local_err, "%s: ", __func__); 407 error_report_err(local_err); 408 return -EFAULT; 409 } 410 411 return count; 412 } 413 414 if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) { 415 error_prepend(&local_err, "%s: ", __func__); 416 error_report_err(local_err); 417 g_free(msgp); 418 return -EFAULT; 419 } 420 421 if (msgp->hdr.flags & VFIO_USER_ERROR) { 422 ret = -msgp->hdr.error_reply; 423 } else { 424 ret = count; 425 } 426 427 g_free(msgp); 428 return ret; 429 } 430 431 /* 432 * Socket-based io_ops 433 */ 434 VFIODeviceIOOps vfio_user_device_io_ops_sock = { 435 .get_region_info = vfio_user_device_io_get_region_info, 436 .get_irq_info = vfio_user_device_io_get_irq_info, 437 .set_irqs = vfio_user_device_io_set_irqs, 438 .region_read = vfio_user_device_io_region_read, 439 .region_write = vfio_user_device_io_region_write, 440 441 }; 442