1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com>
6 * All rights reserved.
7 */
8
9 #include <sys/param.h>
10 #include <sys/conf.h>
11 #define EXTERR_CATEGORY EXTERR_CAT_VMM
12 #include <sys/exterrvar.h>
13 #include <sys/fcntl.h>
14 #include <sys/ioccom.h>
15 #include <sys/jail.h>
16 #include <sys/kernel.h>
17 #include <sys/malloc.h>
18 #include <sys/mman.h>
19 #include <sys/module.h>
20 #include <sys/priv.h>
21 #include <sys/proc.h>
22 #include <sys/queue.h>
23 #include <sys/resourcevar.h>
24 #include <sys/smp.h>
25 #include <sys/sx.h>
26 #include <sys/sysctl.h>
27 #include <sys/ucred.h>
28 #include <sys/uio.h>
29
30 #include <machine/vmm.h>
31
32 #include <vm/vm.h>
33 #include <vm/vm_object.h>
34
35 #include <dev/vmm/vmm_dev.h>
36 #include <dev/vmm/vmm_mem.h>
37 #include <dev/vmm/vmm_stat.h>
38 #include <dev/vmm/vmm_vm.h>
39
40 #ifdef __amd64__
41 #ifdef COMPAT_FREEBSD12
42 struct vm_memseg_12 {
43 int segid;
44 size_t len;
45 char name[64];
46 };
47 _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
48
49 #define VM_ALLOC_MEMSEG_12 \
50 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
51 #define VM_GET_MEMSEG_12 \
52 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
53 #endif /* COMPAT_FREEBSD12 */
54 #ifdef COMPAT_FREEBSD14
55 struct vm_memseg_14 {
56 int segid;
57 size_t len;
58 char name[VM_MAX_SUFFIXLEN + 1];
59 };
60 _Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
61 "COMPAT_FREEBSD14 ABI");
62
63 #define VM_ALLOC_MEMSEG_14 \
64 _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
65 #define VM_GET_MEMSEG_14 \
66 _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
67 #endif /* COMPAT_FREEBSD14 */
68 #endif /* __amd64__ */
69
70 struct devmem_softc {
71 int segid;
72 char *name;
73 struct cdev *cdev;
74 struct vmmdev_softc *sc;
75 SLIST_ENTRY(devmem_softc) link;
76 };
77
78 struct vmmdev_softc {
79 struct vm *vm; /* vm instance cookie */
80 struct cdev *cdev;
81 struct ucred *ucred;
82 SLIST_ENTRY(vmmdev_softc) link;
83 LIST_ENTRY(vmmdev_softc) priv_link;
84 SLIST_HEAD(, devmem_softc) devmem;
85 int flags;
86 };
87
88 struct vmmctl_priv {
89 LIST_HEAD(, vmmdev_softc) softcs;
90 };
91
92 static bool vmm_initialized = false;
93
94 static SLIST_HEAD(, vmmdev_softc) head;
95
96 static unsigned int pr_allow_vmm_flag, pr_allow_vmm_ppt_flag;
97 static struct sx vmmdev_mtx;
98 SX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex");
99
100 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
101
102 SYSCTL_DECL(_hw_vmm);
103
104 u_int vm_maxcpu;
105 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
106 &vm_maxcpu, 0, "Maximum number of vCPUs");
107
108 u_int vm_maxvmms;
109 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN,
110 &vm_maxvmms, 0, "Maximum number of VMM instances per user");
111
112 static void devmem_destroy(void *arg);
113 static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
114 static void vmmdev_destroy(struct vmmdev_softc *sc);
115
116 static int
vmm_priv_check(struct ucred * ucred)117 vmm_priv_check(struct ucred *ucred)
118 {
119 if (jailed(ucred) &&
120 (ucred->cr_prison->pr_allow & pr_allow_vmm_flag) == 0)
121 return (EPERM);
122
123 return (0);
124 }
125
126 static int
vcpu_lock_one(struct vcpu * vcpu)127 vcpu_lock_one(struct vcpu *vcpu)
128 {
129 return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
130 }
131
132 static void
vcpu_unlock_one(struct vcpu * vcpu)133 vcpu_unlock_one(struct vcpu *vcpu)
134 {
135 enum vcpu_state state;
136
137 state = vcpu_get_state(vcpu, NULL);
138 if (state != VCPU_FROZEN) {
139 panic("vcpu %s(%d) has invalid state %d",
140 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state);
141 }
142
143 vcpu_set_state(vcpu, VCPU_IDLE, false);
144 }
145
146 static int
vcpu_lock_all(struct vmmdev_softc * sc)147 vcpu_lock_all(struct vmmdev_softc *sc)
148 {
149 int error;
150
151 /*
152 * Serialize vcpu_lock_all() callers. Individual vCPUs are not locked
153 * in a consistent order so we need to serialize to avoid deadlocks.
154 */
155 vm_lock_vcpus(sc->vm);
156 error = vcpu_set_state_all(sc->vm, VCPU_FROZEN);
157 if (error != 0)
158 vm_unlock_vcpus(sc->vm);
159 return (error);
160 }
161
162 static void
vcpu_unlock_all(struct vmmdev_softc * sc)163 vcpu_unlock_all(struct vmmdev_softc *sc)
164 {
165 struct vcpu *vcpu;
166 uint16_t i, maxcpus;
167
168 maxcpus = vm_get_maxcpus(sc->vm);
169 for (i = 0; i < maxcpus; i++) {
170 vcpu = vm_vcpu(sc->vm, i);
171 if (vcpu == NULL)
172 continue;
173 vcpu_unlock_one(vcpu);
174 }
175 vm_unlock_vcpus(sc->vm);
176 }
177
178 static struct vmmdev_softc *
vmmdev_lookup(const char * name,struct ucred * cred)179 vmmdev_lookup(const char *name, struct ucred *cred)
180 {
181 struct vmmdev_softc *sc;
182
183 sx_assert(&vmmdev_mtx, SA_XLOCKED);
184
185 SLIST_FOREACH(sc, &head, link) {
186 if (strcmp(name, vm_name(sc->vm)) == 0)
187 break;
188 }
189
190 if (sc == NULL)
191 return (NULL);
192
193 if (cr_cansee(cred, sc->ucred))
194 return (NULL);
195
196 return (sc);
197 }
198
199 static struct vmmdev_softc *
vmmdev_lookup2(struct cdev * cdev)200 vmmdev_lookup2(struct cdev *cdev)
201 {
202 return (cdev->si_drv1);
203 }
204
205 static int
vmmdev_rw(struct cdev * cdev,struct uio * uio,int flags)206 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
207 {
208 int error, off, c, prot;
209 vm_paddr_t gpa, maxaddr;
210 void *hpa, *cookie;
211 struct vmmdev_softc *sc;
212
213 sc = vmmdev_lookup2(cdev);
214 if (sc == NULL)
215 return (ENXIO);
216
217 /*
218 * Get a read lock on the guest memory map.
219 */
220 vm_slock_memsegs(sc->vm);
221
222 error = 0;
223 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
224 maxaddr = vmm_sysmem_maxaddr(sc->vm);
225 while (uio->uio_resid > 0 && error == 0) {
226 gpa = uio->uio_offset;
227 off = gpa & PAGE_MASK;
228 c = min(uio->uio_resid, PAGE_SIZE - off);
229
230 /*
231 * The VM has a hole in its physical memory map. If we want to
232 * use 'dd' to inspect memory beyond the hole we need to
233 * provide bogus data for memory that lies in the hole.
234 *
235 * Since this device does not support lseek(2), dd(1) will
236 * read(2) blocks of data to simulate the lseek(2).
237 */
238 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
239 if (hpa == NULL) {
240 if (uio->uio_rw == UIO_READ && gpa < maxaddr)
241 error = uiomove(__DECONST(void *, zero_region),
242 c, uio);
243 else
244 error = EFAULT;
245 } else {
246 error = uiomove(hpa, c, uio);
247 vm_gpa_release(cookie);
248 }
249 }
250 vm_unlock_memsegs(sc->vm);
251 return (error);
252 }
253
254 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
255
256 static int
get_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)257 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
258 {
259 struct devmem_softc *dsc;
260 int error;
261 bool sysmem;
262
263 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
264 if (error || mseg->len == 0)
265 return (error);
266
267 if (!sysmem) {
268 SLIST_FOREACH(dsc, &sc->devmem, link) {
269 if (dsc->segid == mseg->segid)
270 break;
271 }
272 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
273 __func__, mseg->segid));
274 error = copystr(dsc->name, mseg->name, len, NULL);
275 } else {
276 bzero(mseg->name, len);
277 }
278
279 return (error);
280 }
281
282 static int
alloc_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len,struct domainset * domainset)283 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
284 struct domainset *domainset)
285 {
286 char *name;
287 int error;
288 bool sysmem;
289
290 error = 0;
291 name = NULL;
292 sysmem = true;
293
294 /*
295 * The allocation is lengthened by 1 to hold a terminating NUL. It'll
296 * by stripped off when devfs processes the full string.
297 */
298 if (VM_MEMSEG_NAME(mseg)) {
299 sysmem = false;
300 name = malloc(len, M_VMMDEV, M_WAITOK);
301 error = copystr(mseg->name, name, len, NULL);
302 if (error)
303 goto done;
304 }
305 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
306 if (error)
307 goto done;
308
309 if (VM_MEMSEG_NAME(mseg)) {
310 error = devmem_create_cdev(sc, mseg->segid, name);
311 if (error)
312 vm_free_memseg(sc->vm, mseg->segid);
313 else
314 name = NULL; /* freed when 'cdev' is destroyed */
315 }
316 done:
317 free(name, M_VMMDEV);
318 return (error);
319 }
320
321 #if defined(__amd64__) && \
322 (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
323 /*
324 * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
325 */
326 static void
adjust_segid(struct vm_memseg * mseg)327 adjust_segid(struct vm_memseg *mseg)
328 {
329 if (mseg->segid != VM_SYSMEM) {
330 mseg->segid += (VM_BOOTROM - 1);
331 }
332 }
333 #endif
334
335 static int
vm_get_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)336 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
337 uint64_t *regval)
338 {
339 int error, i;
340
341 error = 0;
342 for (i = 0; i < count; i++) {
343 error = vm_get_register(vcpu, regnum[i], ®val[i]);
344 if (error)
345 break;
346 }
347 return (error);
348 }
349
350 static int
vm_set_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)351 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
352 uint64_t *regval)
353 {
354 int error, i;
355
356 error = 0;
357 for (i = 0; i < count; i++) {
358 error = vm_set_register(vcpu, regnum[i], regval[i]);
359 if (error)
360 break;
361 }
362 return (error);
363 }
364
365 static int
vmmdev_open(struct cdev * dev,int flags,int fmt,struct thread * td)366 vmmdev_open(struct cdev *dev, int flags, int fmt, struct thread *td)
367 {
368 int error;
369
370 /*
371 * A jail without vmm access shouldn't be able to access vmm device
372 * files at all, but check here just to be thorough.
373 */
374 error = vmm_priv_check(td->td_ucred);
375 if (error != 0)
376 return (error);
377
378 return (0);
379 }
380
381 static const struct vmmdev_ioctl vmmdev_ioctls[] = {
382 VMMDEV_IOCTL(VM_GET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
383 VMMDEV_IOCTL(VM_SET_REGISTER, VMMDEV_IOCTL_LOCK_ONE_VCPU),
384 VMMDEV_IOCTL(VM_GET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
385 VMMDEV_IOCTL(VM_SET_REGISTER_SET, VMMDEV_IOCTL_LOCK_ONE_VCPU),
386 VMMDEV_IOCTL(VM_GET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
387 VMMDEV_IOCTL(VM_SET_CAPABILITY, VMMDEV_IOCTL_LOCK_ONE_VCPU),
388 VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU),
389 VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU),
390 VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
391 VMMDEV_IOCTL(VM_STAT_DESC, 0),
392
393 #ifdef __amd64__
394 #ifdef COMPAT_FREEBSD12
395 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
396 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
397 #endif
398 #ifdef COMPAT_FREEBSD14
399 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
400 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
401 #endif
402 #endif /* __amd64__ */
403 VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
404 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
405 VMMDEV_IOCTL(VM_MMAP_MEMSEG,
406 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
407 VMMDEV_IOCTL(VM_MUNMAP_MEMSEG,
408 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
409 VMMDEV_IOCTL(VM_REINIT,
410 VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
411
412 #ifdef __amd64__
413 #if defined(COMPAT_FREEBSD12)
414 VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
415 #endif
416 #ifdef COMPAT_FREEBSD14
417 VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
418 #endif
419 #endif /* __amd64__ */
420 VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
421 VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
422
423 VMMDEV_IOCTL(VM_SUSPEND_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
424 VMMDEV_IOCTL(VM_RESUME_CPU, VMMDEV_IOCTL_MAYBE_ALLOC_VCPU),
425
426 VMMDEV_IOCTL(VM_SUSPEND, 0),
427 VMMDEV_IOCTL(VM_GET_CPUS, 0),
428 VMMDEV_IOCTL(VM_GET_TOPOLOGY, 0),
429 VMMDEV_IOCTL(VM_SET_TOPOLOGY, 0),
430 };
431
432 static int
vmmdev_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)433 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
434 struct thread *td)
435 {
436 struct vmmdev_softc *sc;
437 struct vcpu *vcpu;
438 const struct vmmdev_ioctl *ioctl;
439 struct vm_memseg *mseg;
440 int error, vcpuid;
441
442 sc = vmmdev_lookup2(cdev);
443 if (sc == NULL)
444 return (ENXIO);
445
446 ioctl = NULL;
447 for (size_t i = 0; i < nitems(vmmdev_ioctls); i++) {
448 if (vmmdev_ioctls[i].cmd == cmd) {
449 ioctl = &vmmdev_ioctls[i];
450 break;
451 }
452 }
453 if (ioctl == NULL) {
454 for (size_t i = 0; i < vmmdev_machdep_ioctl_count; i++) {
455 if (vmmdev_machdep_ioctls[i].cmd == cmd) {
456 ioctl = &vmmdev_machdep_ioctls[i];
457 break;
458 }
459 }
460 }
461 if (ioctl == NULL)
462 return (ENOTTY);
463
464 if ((ioctl->flags & VMMDEV_IOCTL_PPT) != 0) {
465 if (jailed(td->td_ucred) && (td->td_ucred->cr_prison->pr_allow &
466 pr_allow_vmm_ppt_flag) == 0)
467 return (EPERM);
468 error = priv_check(td, PRIV_VMM_PPTDEV);
469 if (error != 0)
470 return (error);
471 }
472
473 if ((ioctl->flags & VMMDEV_IOCTL_XLOCK_MEMSEGS) != 0)
474 vm_xlock_memsegs(sc->vm);
475 else if ((ioctl->flags & VMMDEV_IOCTL_SLOCK_MEMSEGS) != 0)
476 vm_slock_memsegs(sc->vm);
477
478 vcpu = NULL;
479 vcpuid = -1;
480 if ((ioctl->flags & (VMMDEV_IOCTL_LOCK_ONE_VCPU |
481 VMMDEV_IOCTL_ALLOC_VCPU | VMMDEV_IOCTL_MAYBE_ALLOC_VCPU)) != 0) {
482 vcpuid = *(int *)data;
483 if (vcpuid == -1) {
484 if ((ioctl->flags &
485 VMMDEV_IOCTL_MAYBE_ALLOC_VCPU) == 0) {
486 error = EINVAL;
487 goto lockfail;
488 }
489 } else {
490 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
491 if (vcpu == NULL) {
492 error = EINVAL;
493 goto lockfail;
494 }
495 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0) {
496 error = vcpu_lock_one(vcpu);
497 if (error)
498 goto lockfail;
499 }
500 }
501 }
502 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0) {
503 error = vcpu_lock_all(sc);
504 if (error)
505 goto lockfail;
506 }
507
508 switch (cmd) {
509 case VM_SUSPEND: {
510 struct vm_suspend *vmsuspend;
511
512 vmsuspend = (struct vm_suspend *)data;
513 error = vm_suspend(sc->vm, vmsuspend->how);
514 break;
515 }
516 case VM_REINIT:
517 error = vm_reinit(sc->vm);
518 break;
519 case VM_STAT_DESC: {
520 struct vm_stat_desc *statdesc;
521
522 statdesc = (struct vm_stat_desc *)data;
523 error = vmm_stat_desc_copy(statdesc->index, statdesc->desc,
524 sizeof(statdesc->desc));
525 break;
526 }
527 case VM_STATS: {
528 struct vm_stats *vmstats;
529
530 vmstats = (struct vm_stats *)data;
531 getmicrotime(&vmstats->tv);
532 error = vmm_stat_copy(vcpu, vmstats->index,
533 nitems(vmstats->statbuf), &vmstats->num_entries,
534 vmstats->statbuf);
535 break;
536 }
537 case VM_MMAP_GETNEXT: {
538 struct vm_memmap *mm;
539
540 mm = (struct vm_memmap *)data;
541 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
542 &mm->segoff, &mm->len, &mm->prot, &mm->flags);
543 break;
544 }
545 case VM_MMAP_MEMSEG: {
546 struct vm_memmap *mm;
547
548 mm = (struct vm_memmap *)data;
549 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
550 mm->len, mm->prot, mm->flags);
551 break;
552 }
553 case VM_MUNMAP_MEMSEG: {
554 struct vm_munmap *mu;
555
556 mu = (struct vm_munmap *)data;
557 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
558 break;
559 }
560 #ifdef __amd64__
561 #ifdef COMPAT_FREEBSD12
562 case VM_ALLOC_MEMSEG_12:
563 mseg = (struct vm_memseg *)data;
564
565 adjust_segid(mseg);
566 error = alloc_memseg(sc, mseg,
567 sizeof(((struct vm_memseg_12 *)0)->name), NULL);
568 break;
569 case VM_GET_MEMSEG_12:
570 mseg = (struct vm_memseg *)data;
571
572 adjust_segid(mseg);
573 error = get_memseg(sc, mseg,
574 sizeof(((struct vm_memseg_12 *)0)->name));
575 break;
576 #endif /* COMPAT_FREEBSD12 */
577 #ifdef COMPAT_FREEBSD14
578 case VM_ALLOC_MEMSEG_14:
579 mseg = (struct vm_memseg *)data;
580
581 adjust_segid(mseg);
582 error = alloc_memseg(sc, mseg,
583 sizeof(((struct vm_memseg_14 *)0)->name), NULL);
584 break;
585 case VM_GET_MEMSEG_14:
586 mseg = (struct vm_memseg *)data;
587
588 adjust_segid(mseg);
589 error = get_memseg(sc, mseg,
590 sizeof(((struct vm_memseg_14 *)0)->name));
591 break;
592 #endif /* COMPAT_FREEBSD14 */
593 #endif /* __amd64__ */
594 case VM_ALLOC_MEMSEG: {
595 domainset_t *mask;
596 struct domainset *domainset, domain;
597
598 domainset = NULL;
599 mseg = (struct vm_memseg *)data;
600 if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
601 if (mseg->ds_mask_size < sizeof(domainset_t) ||
602 mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
603 error = ERANGE;
604 break;
605 }
606 memset(&domain, 0, sizeof(domain));
607 mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
608 error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
609 if (error) {
610 free(mask, M_VMMDEV);
611 break;
612 }
613 error = domainset_populate(&domain, mask, mseg->ds_policy,
614 mseg->ds_mask_size);
615 free(mask, M_VMMDEV);
616 if (error)
617 break;
618 domainset = domainset_create(&domain);
619 if (domainset == NULL) {
620 error = EINVAL;
621 break;
622 }
623 }
624 error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
625 break;
626 }
627 case VM_GET_MEMSEG:
628 error = get_memseg(sc, (struct vm_memseg *)data,
629 sizeof(((struct vm_memseg *)0)->name));
630 break;
631 case VM_GET_REGISTER: {
632 struct vm_register *vmreg;
633
634 vmreg = (struct vm_register *)data;
635 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
636 break;
637 }
638 case VM_SET_REGISTER: {
639 struct vm_register *vmreg;
640
641 vmreg = (struct vm_register *)data;
642 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
643 break;
644 }
645 case VM_GET_REGISTER_SET: {
646 struct vm_register_set *vmregset;
647 uint64_t *regvals;
648 int *regnums;
649
650 vmregset = (struct vm_register_set *)data;
651 if (vmregset->count > VM_REG_LAST) {
652 error = EINVAL;
653 break;
654 }
655 regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
656 M_VMMDEV, M_WAITOK);
657 regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
658 M_VMMDEV, M_WAITOK);
659 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
660 vmregset->count);
661 if (error == 0)
662 error = vm_get_register_set(vcpu,
663 vmregset->count, regnums, regvals);
664 if (error == 0)
665 error = copyout(regvals, vmregset->regvals,
666 sizeof(regvals[0]) * vmregset->count);
667 free(regvals, M_VMMDEV);
668 free(regnums, M_VMMDEV);
669 break;
670 }
671 case VM_SET_REGISTER_SET: {
672 struct vm_register_set *vmregset;
673 uint64_t *regvals;
674 int *regnums;
675
676 vmregset = (struct vm_register_set *)data;
677 if (vmregset->count > VM_REG_LAST) {
678 error = EINVAL;
679 break;
680 }
681 regvals = mallocarray(vmregset->count, sizeof(regvals[0]),
682 M_VMMDEV, M_WAITOK);
683 regnums = mallocarray(vmregset->count, sizeof(regnums[0]),
684 M_VMMDEV, M_WAITOK);
685 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
686 vmregset->count);
687 if (error == 0)
688 error = copyin(vmregset->regvals, regvals,
689 sizeof(regvals[0]) * vmregset->count);
690 if (error == 0)
691 error = vm_set_register_set(vcpu,
692 vmregset->count, regnums, regvals);
693 free(regvals, M_VMMDEV);
694 free(regnums, M_VMMDEV);
695 break;
696 }
697 case VM_GET_CAPABILITY: {
698 struct vm_capability *vmcap;
699
700 vmcap = (struct vm_capability *)data;
701 error = vm_get_capability(vcpu, vmcap->captype, &vmcap->capval);
702 break;
703 }
704 case VM_SET_CAPABILITY: {
705 struct vm_capability *vmcap;
706
707 vmcap = (struct vm_capability *)data;
708 error = vm_set_capability(vcpu, vmcap->captype, vmcap->capval);
709 break;
710 }
711 case VM_ACTIVATE_CPU:
712 error = vm_activate_cpu(vcpu);
713 break;
714 case VM_GET_CPUS: {
715 struct vm_cpuset *vm_cpuset;
716 cpuset_t *cpuset;
717 int size;
718
719 error = 0;
720 vm_cpuset = (struct vm_cpuset *)data;
721 size = vm_cpuset->cpusetsize;
722 if (size < 1 || size > CPU_MAXSIZE / NBBY) {
723 error = ERANGE;
724 break;
725 }
726 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
727 M_WAITOK | M_ZERO);
728 if (vm_cpuset->which == VM_ACTIVE_CPUS)
729 *cpuset = vm_active_cpus(sc->vm);
730 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
731 *cpuset = vm_suspended_cpus(sc->vm);
732 else if (vm_cpuset->which == VM_DEBUG_CPUS)
733 *cpuset = vm_debug_cpus(sc->vm);
734 else
735 error = EINVAL;
736 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
737 error = ERANGE;
738 if (error == 0)
739 error = copyout(cpuset, vm_cpuset->cpus, size);
740 free(cpuset, M_TEMP);
741 break;
742 }
743 case VM_SUSPEND_CPU:
744 error = vm_suspend_cpu(sc->vm, vcpu);
745 break;
746 case VM_RESUME_CPU:
747 error = vm_resume_cpu(sc->vm, vcpu);
748 break;
749 case VM_SET_TOPOLOGY: {
750 struct vm_cpu_topology *topology;
751
752 topology = (struct vm_cpu_topology *)data;
753 error = vm_set_topology(sc->vm, topology->sockets,
754 topology->cores, topology->threads, topology->maxcpus);
755 break;
756 }
757 case VM_GET_TOPOLOGY: {
758 struct vm_cpu_topology *topology;
759
760 topology = (struct vm_cpu_topology *)data;
761 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
762 &topology->threads, &topology->maxcpus);
763 error = 0;
764 break;
765 }
766 default:
767 error = vmmdev_machdep_ioctl(sc->vm, vcpu, cmd, data, fflag,
768 td);
769 break;
770 }
771
772 if ((ioctl->flags &
773 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
774 vm_unlock_memsegs(sc->vm);
775 if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ALL_VCPUS) != 0)
776 vcpu_unlock_all(sc);
777 else if ((ioctl->flags & VMMDEV_IOCTL_LOCK_ONE_VCPU) != 0)
778 vcpu_unlock_one(vcpu);
779
780 /*
781 * Make sure that no handler returns a kernel-internal
782 * error value to userspace.
783 */
784 KASSERT(error == ERESTART || error >= 0,
785 ("vmmdev_ioctl: invalid error return %d", error));
786 return (error);
787
788 lockfail:
789 if ((ioctl->flags &
790 (VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_SLOCK_MEMSEGS)) != 0)
791 vm_unlock_memsegs(sc->vm);
792 return (error);
793 }
794
795 static int
vmmdev_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)796 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
797 struct vm_object **objp, int nprot)
798 {
799 struct vmmdev_softc *sc;
800 vm_paddr_t gpa;
801 size_t len;
802 vm_ooffset_t segoff, first, last;
803 int error, found, segid;
804 bool sysmem;
805
806 first = *offset;
807 last = first + mapsize;
808 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
809 return (EINVAL);
810
811 sc = vmmdev_lookup2(cdev);
812 if (sc == NULL) {
813 /* virtual machine is in the process of being created */
814 return (EINVAL);
815 }
816
817 /*
818 * Get a read lock on the guest memory map.
819 */
820 vm_slock_memsegs(sc->vm);
821
822 gpa = 0;
823 found = 0;
824 while (!found) {
825 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
826 NULL, NULL);
827 if (error)
828 break;
829
830 if (first >= gpa && last <= gpa + len)
831 found = 1;
832 else
833 gpa += len;
834 }
835
836 if (found) {
837 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
838 KASSERT(error == 0 && *objp != NULL,
839 ("%s: invalid memory segment %d", __func__, segid));
840 if (sysmem) {
841 vm_object_reference(*objp);
842 *offset = segoff + (first - gpa);
843 } else {
844 error = EINVAL;
845 }
846 }
847 vm_unlock_memsegs(sc->vm);
848 return (error);
849 }
850
851 static void
vmmdev_destroy(struct vmmdev_softc * sc)852 vmmdev_destroy(struct vmmdev_softc *sc)
853 {
854 struct devmem_softc *dsc;
855 int error __diagused;
856
857 KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
858 KASSERT(sc->ucred != NULL, ("%s: missing ucred", __func__));
859
860 /*
861 * Destroy all cdevs:
862 *
863 * - any new operations on the 'cdev' will return an error (ENXIO).
864 *
865 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
866 */
867 SLIST_FOREACH(dsc, &sc->devmem, link) {
868 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
869 devmem_destroy(dsc);
870 }
871
872 vm_disable_vcpu_creation(sc->vm);
873 error = vcpu_lock_all(sc);
874 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
875 vm_unlock_vcpus(sc->vm);
876
877 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
878 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
879 SLIST_REMOVE_HEAD(&sc->devmem, link);
880 free(dsc->name, M_VMMDEV);
881 free(dsc, M_VMMDEV);
882 }
883
884 vm_destroy(sc->vm);
885
886 chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0);
887 crfree(sc->ucred);
888
889 sx_xlock(&vmmdev_mtx);
890 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
891 if ((sc->flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0)
892 LIST_REMOVE(sc, priv_link);
893 sx_xunlock(&vmmdev_mtx);
894 wakeup(sc);
895 free(sc, M_VMMDEV);
896 }
897
898 static int
vmmdev_lookup_and_destroy(const char * name,struct ucred * cred)899 vmmdev_lookup_and_destroy(const char *name, struct ucred *cred)
900 {
901 struct cdev *cdev;
902 struct vmmdev_softc *sc;
903 int error;
904
905 sx_xlock(&vmmdev_mtx);
906 sc = vmmdev_lookup(name, cred);
907 if (sc == NULL || sc->cdev == NULL) {
908 sx_xunlock(&vmmdev_mtx);
909 return (EINVAL);
910 }
911
912 /*
913 * Only the creator of a VM or a privileged user can destroy it.
914 */
915 if ((cred->cr_uid != sc->ucred->cr_uid ||
916 cred->cr_prison != sc->ucred->cr_prison) &&
917 (error = priv_check_cred(cred, PRIV_VMM_DESTROY)) != 0) {
918 sx_xunlock(&vmmdev_mtx);
919 return (error);
920 }
921
922 /*
923 * Setting 'sc->cdev' to NULL is used to indicate that the VM
924 * is scheduled for destruction.
925 */
926 cdev = sc->cdev;
927 sc->cdev = NULL;
928 sx_xunlock(&vmmdev_mtx);
929
930 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
931 destroy_dev(cdev);
932 vmmdev_destroy(sc);
933
934 return (0);
935 }
936
937 static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)938 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
939 {
940 char *buf;
941 int error, buflen;
942
943 error = vmm_priv_check(req->td->td_ucred);
944 if (error)
945 return (error);
946
947 buflen = VM_MAX_NAMELEN + 1;
948 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
949 error = sysctl_handle_string(oidp, buf, buflen, req);
950 if (error == 0 && req->newptr != NULL)
951 error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
952 free(buf, M_VMMDEV);
953 return (error);
954 }
955 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
956 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
957 NULL, 0, sysctl_vmm_destroy, "A",
958 "Destroy a vmm(4) instance (legacy interface)");
959
960 static struct cdevsw vmmdevsw = {
961 .d_name = "vmmdev",
962 .d_version = D_VERSION,
963 .d_open = vmmdev_open,
964 .d_ioctl = vmmdev_ioctl,
965 .d_mmap_single = vmmdev_mmap_single,
966 .d_read = vmmdev_rw,
967 .d_write = vmmdev_rw,
968 };
969
970 static struct vmmdev_softc *
vmmdev_alloc(struct vm * vm,struct ucred * cred)971 vmmdev_alloc(struct vm *vm, struct ucred *cred)
972 {
973 struct vmmdev_softc *sc;
974
975 sc = malloc(sizeof(*sc), M_VMMDEV, M_WAITOK | M_ZERO);
976 SLIST_INIT(&sc->devmem);
977 sc->vm = vm;
978 sc->ucred = crhold(cred);
979 return (sc);
980 }
981
982 static int
vmmdev_create(const char * name,uint32_t flags,struct ucred * cred)983 vmmdev_create(const char *name, uint32_t flags, struct ucred *cred)
984 {
985 struct make_dev_args mda;
986 struct cdev *cdev;
987 struct vmmdev_softc *sc;
988 struct vmmctl_priv *priv;
989 struct vm *vm;
990 int error;
991
992 if (name == NULL || strlen(name) > VM_MAX_NAMELEN)
993 return (EINVAL);
994
995 if ((flags & ~VMMCTL_FLAGS_MASK) != 0)
996 return (EINVAL);
997 error = devfs_get_cdevpriv((void **)&priv);
998 if (error)
999 return (error);
1000
1001 sx_xlock(&vmmdev_mtx);
1002 sc = vmmdev_lookup(name, cred);
1003 if (sc != NULL) {
1004 sx_xunlock(&vmmdev_mtx);
1005 return (EEXIST);
1006 }
1007
1008 /*
1009 * Unprivileged users can only create VMs that will be automatically
1010 * destroyed when the creating descriptor is closed.
1011 */
1012 if ((flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) == 0 &&
1013 (error = priv_check_cred(cred, PRIV_VMM_CREATE)) != 0) {
1014 sx_xunlock(&vmmdev_mtx);
1015 return (EXTERROR(error,
1016 "An unprivileged user must run VMs in monitor mode"));
1017 }
1018
1019 if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) {
1020 sx_xunlock(&vmmdev_mtx);
1021 return (ENOMEM);
1022 }
1023
1024 error = vm_create(name, &vm);
1025 if (error != 0) {
1026 sx_xunlock(&vmmdev_mtx);
1027 (void)chgvmmcnt(cred->cr_ruidinfo, -1, 0);
1028 return (error);
1029 }
1030 sc = vmmdev_alloc(vm, cred);
1031 SLIST_INSERT_HEAD(&head, sc, link);
1032 sc->flags = flags;
1033 if ((flags & VMMCTL_CREATE_DESTROY_ON_CLOSE) != 0)
1034 LIST_INSERT_HEAD(&priv->softcs, sc, priv_link);
1035
1036 make_dev_args_init(&mda);
1037 mda.mda_devsw = &vmmdevsw;
1038 mda.mda_cr = sc->ucred;
1039 mda.mda_uid = cred->cr_uid;
1040 mda.mda_gid = GID_VMM;
1041 mda.mda_mode = 0600;
1042 mda.mda_si_drv1 = sc;
1043 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1044 error = make_dev_s(&mda, &cdev, "vmm/%s", name);
1045 if (error != 0) {
1046 sx_xunlock(&vmmdev_mtx);
1047 vmmdev_destroy(sc);
1048 return (error);
1049 }
1050 sc->cdev = cdev;
1051 sx_xunlock(&vmmdev_mtx);
1052 return (0);
1053 }
1054
1055 static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)1056 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
1057 {
1058 char *buf;
1059 int error, buflen;
1060
1061 if (!vmm_initialized)
1062 return (ENXIO);
1063
1064 error = vmm_priv_check(req->td->td_ucred);
1065 if (error != 0)
1066 return (error);
1067
1068 buflen = VM_MAX_NAMELEN + 1;
1069 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
1070 error = sysctl_handle_string(oidp, buf, buflen, req);
1071 if (error == 0 && req->newptr != NULL)
1072 error = vmmdev_create(buf, 0, req->td->td_ucred);
1073 free(buf, M_VMMDEV);
1074 return (error);
1075 }
1076 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
1077 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1078 NULL, 0, sysctl_vmm_create, "A",
1079 "Create a vmm(4) instance (legacy interface)");
1080
1081 static void
vmmctl_dtor(void * arg)1082 vmmctl_dtor(void *arg)
1083 {
1084 struct cdev *sc_cdev;
1085 struct vmmdev_softc *sc;
1086 struct vmmctl_priv *priv = arg;
1087
1088 /*
1089 * Scan the softc list for any VMs associated with
1090 * the current descriptor and destroy them.
1091 */
1092 sx_xlock(&vmmdev_mtx);
1093 while (!LIST_EMPTY(&priv->softcs)) {
1094 sc = LIST_FIRST(&priv->softcs);
1095 sc_cdev = sc->cdev;
1096 if (sc_cdev != NULL) {
1097 sc->cdev = NULL;
1098 } else {
1099 /*
1100 * Another thread has already
1101 * started the removal process.
1102 * Sleep until 'vmmdev_destroy' notifies us
1103 * that the removal has finished.
1104 */
1105 sx_sleep(sc, &vmmdev_mtx, 0, "vmmctl_dtor", 0);
1106 continue;
1107 }
1108 /*
1109 * Temporarily drop the lock to allow vmmdev_destroy to run.
1110 */
1111 sx_xunlock(&vmmdev_mtx);
1112 (void)vm_suspend(sc->vm, VM_SUSPEND_DESTROY);
1113 destroy_dev(sc_cdev);
1114 /* vmmdev_destroy will unlink the 'priv_link' entry. */
1115 vmmdev_destroy(sc);
1116 sx_xlock(&vmmdev_mtx);
1117 }
1118 sx_xunlock(&vmmdev_mtx);
1119
1120 free(priv, M_VMMDEV);
1121 }
1122
1123 static int
vmmctl_open(struct cdev * cdev,int flags,int fmt,struct thread * td)1124 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
1125 {
1126 int error;
1127 struct vmmctl_priv *priv;
1128
1129 error = vmm_priv_check(td->td_ucred);
1130 if (error != 0)
1131 return (error);
1132
1133 if ((flags & FWRITE) == 0)
1134 return (EPERM);
1135
1136 priv = malloc(sizeof(*priv), M_VMMDEV, M_WAITOK | M_ZERO);
1137 LIST_INIT(&priv->softcs);
1138 error = devfs_set_cdevpriv(priv, vmmctl_dtor);
1139 if (error != 0) {
1140 free(priv, M_VMMDEV);
1141 return (error);
1142 }
1143
1144 return (0);
1145 }
1146
1147 static int
vmmctl_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)1148 vmmctl_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
1149 struct thread *td)
1150 {
1151 int error;
1152
1153 switch (cmd) {
1154 case VMMCTL_VM_CREATE: {
1155 struct vmmctl_vm_create *vmc;
1156
1157 vmc = (struct vmmctl_vm_create *)data;
1158 vmc->name[VM_MAX_NAMELEN] = '\0';
1159 for (size_t i = 0; i < nitems(vmc->reserved); i++) {
1160 if (vmc->reserved[i] != 0) {
1161 error = EINVAL;
1162 return (error);
1163 }
1164 }
1165
1166 error = vmmdev_create(vmc->name, vmc->flags, td->td_ucred);
1167 break;
1168 }
1169 case VMMCTL_VM_DESTROY: {
1170 struct vmmctl_vm_destroy *vmd;
1171
1172 vmd = (struct vmmctl_vm_destroy *)data;
1173 vmd->name[VM_MAX_NAMELEN] = '\0';
1174 for (size_t i = 0; i < nitems(vmd->reserved); i++) {
1175 if (vmd->reserved[i] != 0) {
1176 error = EINVAL;
1177 return (error);
1178 }
1179 }
1180
1181 error = vmmdev_lookup_and_destroy(vmd->name, td->td_ucred);
1182 break;
1183 }
1184 default:
1185 error = ENOTTY;
1186 break;
1187 }
1188
1189 return (error);
1190 }
1191
1192 static struct cdev *vmmctl_cdev;
1193 static struct cdevsw vmmctlsw = {
1194 .d_name = "vmmctl",
1195 .d_version = D_VERSION,
1196 .d_open = vmmctl_open,
1197 .d_ioctl = vmmctl_ioctl,
1198 };
1199
1200 static int
vmmdev_init(void)1201 vmmdev_init(void)
1202 {
1203 int error;
1204
1205 sx_xlock(&vmmdev_mtx);
1206 error = make_dev_p(MAKEDEV_CHECKNAME, &vmmctl_cdev, &vmmctlsw, NULL,
1207 UID_ROOT, GID_VMM, 0660, "vmmctl");
1208 if (error == 0) {
1209 pr_allow_vmm_flag = prison_add_allow(NULL, "vmm", NULL,
1210 "Allow use of vmm in a jail");
1211 pr_allow_vmm_ppt_flag = prison_add_allow(NULL, "vmm_ppt", NULL,
1212 "Allow use of vmm with ppt devices in a jail");
1213 }
1214 sx_xunlock(&vmmdev_mtx);
1215
1216 return (error);
1217 }
1218
1219 static int
vmmdev_cleanup(void)1220 vmmdev_cleanup(void)
1221 {
1222 sx_xlock(&vmmdev_mtx);
1223 if (!SLIST_EMPTY(&head)) {
1224 sx_xunlock(&vmmdev_mtx);
1225 return (EBUSY);
1226 }
1227 if (vmmctl_cdev != NULL) {
1228 destroy_dev(vmmctl_cdev);
1229 vmmctl_cdev = NULL;
1230 }
1231 sx_xunlock(&vmmdev_mtx);
1232
1233 return (0);
1234 }
1235
1236 static int
vmm_handler(module_t mod,int what,void * arg)1237 vmm_handler(module_t mod, int what, void *arg)
1238 {
1239 int error;
1240
1241 switch (what) {
1242 case MOD_LOAD:
1243 error = vmmdev_init();
1244 if (error != 0)
1245 break;
1246
1247 vm_maxcpu = mp_ncpus;
1248 TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
1249 if (vm_maxcpu > VM_MAXCPU) {
1250 printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
1251 vm_maxcpu = VM_MAXCPU;
1252 }
1253 if (vm_maxcpu == 0)
1254 vm_maxcpu = 1;
1255 vm_maxvmms = 4 * mp_ncpus;
1256 error = vmm_modinit();
1257 if (error == 0)
1258 vmm_initialized = true;
1259 else {
1260 int error1 __diagused;
1261
1262 error1 = vmmdev_cleanup();
1263 KASSERT(error1 == 0,
1264 ("%s: vmmdev_cleanup failed: %d", __func__, error1));
1265 }
1266 break;
1267 case MOD_UNLOAD:
1268 error = vmmdev_cleanup();
1269 if (error == 0 && vmm_initialized) {
1270 error = vmm_modcleanup();
1271 if (error) {
1272 /*
1273 * Something bad happened - prevent new
1274 * VMs from being created
1275 */
1276 vmm_initialized = false;
1277 }
1278 }
1279 break;
1280 default:
1281 error = 0;
1282 break;
1283 }
1284 return (error);
1285 }
1286
1287 static moduledata_t vmm_kmod = {
1288 "vmm",
1289 vmm_handler,
1290 NULL
1291 };
1292
1293 /*
1294 * vmm initialization has the following dependencies:
1295 *
1296 * - Initialization requires smp_rendezvous() and therefore must happen
1297 * after SMP is fully functional (after SI_SUB_SMP).
1298 * - vmm device initialization requires an initialized devfs.
1299 */
1300 DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
1301 MODULE_VERSION(vmm, 1);
1302
1303 static int
devmem_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t len,struct vm_object ** objp,int nprot)1304 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1305 struct vm_object **objp, int nprot)
1306 {
1307 struct devmem_softc *dsc;
1308 vm_ooffset_t first, last;
1309 size_t seglen;
1310 int error;
1311 bool sysmem;
1312
1313 dsc = cdev->si_drv1;
1314 if (dsc == NULL) {
1315 /* 'cdev' has been created but is not ready for use */
1316 return (ENXIO);
1317 }
1318
1319 first = *offset;
1320 last = *offset + len;
1321 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1322 return (EINVAL);
1323
1324 vm_slock_memsegs(dsc->sc->vm);
1325
1326 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1327 KASSERT(error == 0 && !sysmem && *objp != NULL,
1328 ("%s: invalid devmem segment %d", __func__, dsc->segid));
1329
1330 if (seglen >= last)
1331 vm_object_reference(*objp);
1332 else
1333 error = EINVAL;
1334
1335 vm_unlock_memsegs(dsc->sc->vm);
1336 return (error);
1337 }
1338
1339 static struct cdevsw devmemsw = {
1340 .d_name = "devmem",
1341 .d_version = D_VERSION,
1342 .d_mmap_single = devmem_mmap_single,
1343 };
1344
1345 static int
devmem_create_cdev(struct vmmdev_softc * sc,int segid,char * devname)1346 devmem_create_cdev(struct vmmdev_softc *sc, int segid, char *devname)
1347 {
1348 struct make_dev_args mda;
1349 struct devmem_softc *dsc;
1350 int error;
1351
1352 sx_xlock(&vmmdev_mtx);
1353
1354 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1355 dsc->segid = segid;
1356 dsc->name = devname;
1357 dsc->sc = sc;
1358 SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1359
1360 make_dev_args_init(&mda);
1361 mda.mda_devsw = &devmemsw;
1362 mda.mda_cr = sc->ucred;
1363 mda.mda_uid = sc->ucred->cr_uid;
1364 mda.mda_gid = GID_VMM;
1365 mda.mda_mode = 0600;
1366 mda.mda_si_drv1 = dsc;
1367 mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
1368 error = make_dev_s(&mda, &dsc->cdev, "vmm.io/%s.%s", vm_name(sc->vm),
1369 devname);
1370 if (error != 0) {
1371 SLIST_REMOVE(&sc->devmem, dsc, devmem_softc, link);
1372 free(dsc->name, M_VMMDEV);
1373 free(dsc, M_VMMDEV);
1374 }
1375
1376 sx_xunlock(&vmmdev_mtx);
1377
1378 return (error);
1379 }
1380
1381 static void
devmem_destroy(void * arg)1382 devmem_destroy(void *arg)
1383 {
1384 struct devmem_softc *dsc = arg;
1385
1386 destroy_dev(dsc->cdev);
1387 dsc->cdev = NULL;
1388 dsc->sc = NULL;
1389 }
1390