xref: /cloud-hypervisor/virtio-devices/src/iommu.rs (revision f67b3f79ea19c9a66e04074cbbf5d292f6529e43)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
4 
5 use super::Error as DeviceError;
6 use super::{
7     ActivateResult, DescriptorChain, EpollHelper, EpollHelperError, EpollHelperHandler, Queue,
8     VirtioCommon, VirtioDevice, VirtioDeviceType, EPOLL_HELPER_EVENT_LAST, VIRTIO_F_VERSION_1,
9 };
10 use crate::seccomp_filters::Thread;
11 use crate::thread_helper::spawn_virtio_thread;
12 use crate::GuestMemoryMmap;
13 use crate::{DmaRemapping, VirtioInterrupt, VirtioInterruptType};
14 use seccompiler::SeccompAction;
15 use std::collections::BTreeMap;
16 use std::fmt::{self, Display};
17 use std::io;
18 use std::mem::size_of;
19 use std::ops::Bound::Included;
20 use std::os::unix::io::AsRawFd;
21 use std::result;
22 use std::sync::atomic::AtomicBool;
23 use std::sync::{Arc, Barrier, RwLock};
24 use versionize::{VersionMap, Versionize, VersionizeResult};
25 use versionize_derive::Versionize;
26 use vm_device::dma_mapping::ExternalDmaMapping;
27 use vm_memory::{
28     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
29     GuestMemoryError,
30 };
31 use vm_migration::VersionMapped;
32 use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
33 use vmm_sys_util::eventfd::EventFd;
34 
35 /// Queues sizes
36 const QUEUE_SIZE: u16 = 256;
37 const NUM_QUEUES: usize = 2;
38 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE; NUM_QUEUES];
39 
40 /// New descriptors are pending on the request queue.
41 /// "requestq" is meant to be used anytime an action is required to be
42 /// performed on behalf of the guest driver.
43 const REQUEST_Q_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1;
44 /// New descriptors are pending on the event queue.
45 /// "eventq" lets the device report any fault or other asynchronous event to
46 /// the guest driver.
47 const EVENT_Q_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2;
48 
49 /// PROBE properties size.
50 /// This is the minimal size to provide at least one RESV_MEM property.
51 /// Because virtio-iommu expects one MSI reserved region, we must provide it,
52 /// otherwise the driver in the guest will define a predefined one between
53 /// 0x8000000 and 0x80FFFFF, which is only relevant for ARM architecture, but
54 /// will conflict with x86.
55 const PROBE_PROP_SIZE: u32 =
56     (size_of::<VirtioIommuProbeProperty>() + size_of::<VirtioIommuProbeResvMem>()) as u32;
57 const MSI_IOVA_START: u64 = 0xfee0_0000;
58 const MSI_IOVA_END: u64 = 0xfeef_ffff;
59 
60 /// Virtio IOMMU features
61 #[allow(unused)]
62 const VIRTIO_IOMMU_F_INPUT_RANGE: u32 = 0;
63 #[allow(unused)]
64 const VIRTIO_IOMMU_F_DOMAIN_RANGE: u32 = 1;
65 #[allow(unused)]
66 const VIRTIO_IOMMU_F_MAP_UNMAP: u32 = 2;
67 #[allow(unused)]
68 const VIRTIO_IOMMU_F_BYPASS: u32 = 3;
69 const VIRTIO_IOMMU_F_PROBE: u32 = 4;
70 #[allow(unused)]
71 const VIRTIO_IOMMU_F_MMIO: u32 = 5;
72 #[allow(unused)]
73 const VIRTIO_IOMMU_F_BYPASS_CONFIG: u32 = 6;
74 
75 // Support 2MiB and 4KiB page sizes.
76 const VIRTIO_IOMMU_PAGE_SIZE_MASK: u64 = (2 << 20) | (4 << 10);
77 
78 #[derive(Copy, Clone, Debug, Default)]
79 #[repr(packed)]
80 struct VirtioIommuRange32 {
81     start: u32,
82     end: u32,
83 }
84 
85 unsafe impl ByteValued for VirtioIommuRange32 {}
86 
87 #[derive(Copy, Clone, Debug, Default)]
88 #[repr(packed)]
89 struct VirtioIommuRange64 {
90     start: u64,
91     end: u64,
92 }
93 
94 unsafe impl ByteValued for VirtioIommuRange64 {}
95 
96 #[derive(Copy, Clone, Debug, Default)]
97 #[repr(packed)]
98 struct VirtioIommuConfig {
99     page_size_mask: u64,
100     input_range: VirtioIommuRange64,
101     domain_range: VirtioIommuRange32,
102     probe_size: u32,
103     bypass: u8,
104     reserved: [u8; 7],
105 }
106 
107 unsafe impl ByteValued for VirtioIommuConfig {}
108 
109 /// Virtio IOMMU request type
110 const VIRTIO_IOMMU_T_ATTACH: u8 = 1;
111 const VIRTIO_IOMMU_T_DETACH: u8 = 2;
112 const VIRTIO_IOMMU_T_MAP: u8 = 3;
113 const VIRTIO_IOMMU_T_UNMAP: u8 = 4;
114 const VIRTIO_IOMMU_T_PROBE: u8 = 5;
115 
116 #[derive(Copy, Clone, Debug, Default)]
117 #[repr(packed)]
118 struct VirtioIommuReqHead {
119     type_: u8,
120     reserved: [u8; 3],
121 }
122 
123 unsafe impl ByteValued for VirtioIommuReqHead {}
124 
125 /// Virtio IOMMU request status
126 const VIRTIO_IOMMU_S_OK: u8 = 0;
127 #[allow(unused)]
128 const VIRTIO_IOMMU_S_IOERR: u8 = 1;
129 #[allow(unused)]
130 const VIRTIO_IOMMU_S_UNSUPP: u8 = 2;
131 #[allow(unused)]
132 const VIRTIO_IOMMU_S_DEVERR: u8 = 3;
133 #[allow(unused)]
134 const VIRTIO_IOMMU_S_INVAL: u8 = 4;
135 #[allow(unused)]
136 const VIRTIO_IOMMU_S_RANGE: u8 = 5;
137 #[allow(unused)]
138 const VIRTIO_IOMMU_S_NOENT: u8 = 6;
139 #[allow(unused)]
140 const VIRTIO_IOMMU_S_FAULT: u8 = 7;
141 #[allow(unused)]
142 const VIRTIO_IOMMU_S_NOMEM: u8 = 8;
143 
144 #[derive(Copy, Clone, Debug, Default)]
145 #[repr(packed)]
146 struct VirtioIommuReqTail {
147     status: u8,
148     reserved: [u8; 3],
149 }
150 
151 unsafe impl ByteValued for VirtioIommuReqTail {}
152 
153 /// ATTACH request
154 #[derive(Copy, Clone, Debug, Default)]
155 #[repr(packed)]
156 struct VirtioIommuReqAttach {
157     domain: u32,
158     endpoint: u32,
159     reserved: [u8; 8],
160 }
161 
162 unsafe impl ByteValued for VirtioIommuReqAttach {}
163 
164 /// DETACH request
165 #[derive(Copy, Clone, Debug, Default)]
166 #[repr(packed)]
167 struct VirtioIommuReqDetach {
168     domain: u32,
169     endpoint: u32,
170     reserved: [u8; 8],
171 }
172 
173 unsafe impl ByteValued for VirtioIommuReqDetach {}
174 
175 /// Virtio IOMMU request MAP flags
176 #[allow(unused)]
177 const VIRTIO_IOMMU_MAP_F_READ: u32 = 1;
178 #[allow(unused)]
179 const VIRTIO_IOMMU_MAP_F_WRITE: u32 = 1 << 1;
180 #[allow(unused)]
181 const VIRTIO_IOMMU_MAP_F_MMIO: u32 = 1 << 2;
182 #[allow(unused)]
183 const VIRTIO_IOMMU_MAP_F_MASK: u32 =
184     VIRTIO_IOMMU_MAP_F_READ | VIRTIO_IOMMU_MAP_F_WRITE | VIRTIO_IOMMU_MAP_F_MMIO;
185 
186 /// MAP request
187 #[derive(Copy, Clone, Debug, Default)]
188 #[repr(packed)]
189 struct VirtioIommuReqMap {
190     domain: u32,
191     virt_start: u64,
192     virt_end: u64,
193     phys_start: u64,
194     flags: u32,
195 }
196 
197 unsafe impl ByteValued for VirtioIommuReqMap {}
198 
199 /// UNMAP request
200 #[derive(Copy, Clone, Debug, Default)]
201 #[repr(packed)]
202 struct VirtioIommuReqUnmap {
203     domain: u32,
204     virt_start: u64,
205     virt_end: u64,
206     reserved: [u8; 4],
207 }
208 
209 unsafe impl ByteValued for VirtioIommuReqUnmap {}
210 
211 /// Virtio IOMMU request PROBE types
212 #[allow(unused)]
213 const VIRTIO_IOMMU_PROBE_T_NONE: u16 = 0;
214 const VIRTIO_IOMMU_PROBE_T_RESV_MEM: u16 = 1;
215 #[allow(unused)]
216 const VIRTIO_IOMMU_PROBE_T_MASK: u16 = 0xfff;
217 
218 /// PROBE request
219 #[derive(Copy, Clone, Debug, Default)]
220 #[repr(packed)]
221 struct VirtioIommuReqProbe {
222     endpoint: u32,
223     reserved: [u64; 8],
224 }
225 
226 unsafe impl ByteValued for VirtioIommuReqProbe {}
227 
228 #[derive(Copy, Clone, Debug, Default)]
229 #[repr(packed)]
230 struct VirtioIommuProbeProperty {
231     type_: u16,
232     length: u16,
233 }
234 
235 unsafe impl ByteValued for VirtioIommuProbeProperty {}
236 
237 /// Virtio IOMMU request PROBE property RESV_MEM subtypes
238 #[allow(unused)]
239 const VIRTIO_IOMMU_RESV_MEM_T_RESERVED: u8 = 0;
240 const VIRTIO_IOMMU_RESV_MEM_T_MSI: u8 = 1;
241 
242 #[derive(Copy, Clone, Debug, Default)]
243 #[repr(packed)]
244 struct VirtioIommuProbeResvMem {
245     subtype: u8,
246     reserved: [u8; 3],
247     start: u64,
248     end: u64,
249 }
250 
251 unsafe impl ByteValued for VirtioIommuProbeResvMem {}
252 
253 /// Virtio IOMMU fault flags
254 #[allow(unused)]
255 const VIRTIO_IOMMU_FAULT_F_READ: u32 = 1;
256 #[allow(unused)]
257 const VIRTIO_IOMMU_FAULT_F_WRITE: u32 = 1 << 1;
258 #[allow(unused)]
259 const VIRTIO_IOMMU_FAULT_F_EXEC: u32 = 1 << 2;
260 #[allow(unused)]
261 const VIRTIO_IOMMU_FAULT_F_ADDRESS: u32 = 1 << 8;
262 
263 /// Virtio IOMMU fault reasons
264 #[allow(unused)]
265 const VIRTIO_IOMMU_FAULT_R_UNKNOWN: u32 = 0;
266 #[allow(unused)]
267 const VIRTIO_IOMMU_FAULT_R_DOMAIN: u32 = 1;
268 #[allow(unused)]
269 const VIRTIO_IOMMU_FAULT_R_MAPPING: u32 = 2;
270 
271 /// Fault reporting through eventq
272 #[allow(unused)]
273 #[derive(Copy, Clone, Debug, Default)]
274 #[repr(packed)]
275 struct VirtioIommuFault {
276     reason: u8,
277     reserved: [u8; 3],
278     flags: u32,
279     endpoint: u32,
280     reserved2: [u8; 4],
281     address: u64,
282 }
283 
284 unsafe impl ByteValued for VirtioIommuFault {}
285 
286 #[derive(Debug)]
287 enum Error {
288     /// Guest gave us bad memory addresses.
289     GuestMemory(GuestMemoryError),
290     /// Guest gave us a write only descriptor that protocol says to read from.
291     UnexpectedWriteOnlyDescriptor,
292     /// Guest gave us a read only descriptor that protocol says to write to.
293     UnexpectedReadOnlyDescriptor,
294     /// Guest gave us too few descriptors in a descriptor chain.
295     DescriptorChainTooShort,
296     /// Guest gave us a buffer that was too short to use.
297     BufferLengthTooSmall,
298     /// Guest sent us invalid request.
299     InvalidRequest,
300     /// Guest sent us invalid ATTACH request.
301     InvalidAttachRequest,
302     /// Guest sent us invalid DETACH request.
303     InvalidDetachRequest,
304     /// Guest sent us invalid MAP request.
305     InvalidMapRequest,
306     /// Guest sent us invalid UNMAP request.
307     InvalidUnmapRequest,
308     /// Guest sent us invalid PROBE request.
309     InvalidProbeRequest,
310     /// Failed to performing external mapping.
311     ExternalMapping(io::Error),
312     /// Failed to performing external unmapping.
313     ExternalUnmapping(io::Error),
314 }
315 
316 impl Display for Error {
317     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
318         use self::Error::*;
319 
320         match self {
321             BufferLengthTooSmall => write!(f, "buffer length too small"),
322             DescriptorChainTooShort => write!(f, "descriptor chain too short"),
323             GuestMemory(e) => write!(f, "bad guest memory address: {}", e),
324             InvalidRequest => write!(f, "invalid request"),
325             InvalidAttachRequest => write!(f, "invalid attach request"),
326             InvalidDetachRequest => write!(f, "invalid detach request"),
327             InvalidMapRequest => write!(f, "invalid map request"),
328             InvalidUnmapRequest => write!(f, "invalid unmap request"),
329             InvalidProbeRequest => write!(f, "invalid probe request"),
330             UnexpectedReadOnlyDescriptor => write!(f, "unexpected read-only descriptor"),
331             UnexpectedWriteOnlyDescriptor => write!(f, "unexpected write-only descriptor"),
332             ExternalMapping(e) => write!(f, "failed performing external mapping: {}", e),
333             ExternalUnmapping(e) => write!(f, "failed performing external unmapping: {}", e),
334         }
335     }
336 }
337 
338 struct Request {}
339 
340 impl Request {
341     // Parse the available vring buffer. Based on the hashmap table of external
342     // mappings required from various devices such as VFIO or vhost-user ones,
343     // this function might update the hashmap table of external mappings per
344     // domain.
345     // Basically, the VMM knows about the device_id <=> mapping relationship
346     // before running the VM, but at runtime, a new domain <=> mapping hashmap
347     // is created based on the information provided from the guest driver for
348     // virtio-iommu (giving the link device_id <=> domain).
349     fn parse(
350         avail_desc: &DescriptorChain,
351         mem: &GuestMemoryMmap,
352         mapping: &Arc<IommuMapping>,
353         ext_mapping: &BTreeMap<u32, Arc<dyn ExternalDmaMapping>>,
354         ext_domain_mapping: &mut BTreeMap<u32, Arc<dyn ExternalDmaMapping>>,
355     ) -> result::Result<usize, Error> {
356         // The head contains the request type which MUST be readable.
357         if avail_desc.is_write_only() {
358             return Err(Error::UnexpectedWriteOnlyDescriptor);
359         }
360 
361         if (avail_desc.len as usize) < size_of::<VirtioIommuReqHead>() {
362             return Err(Error::InvalidRequest);
363         }
364 
365         let req_head: VirtioIommuReqHead =
366             mem.read_obj(avail_desc.addr).map_err(Error::GuestMemory)?;
367         let req_offset = size_of::<VirtioIommuReqHead>();
368         let desc_size_left = (avail_desc.len as usize) - req_offset;
369         let req_addr = if let Some(addr) = avail_desc.addr.checked_add(req_offset as u64) {
370             addr
371         } else {
372             return Err(Error::InvalidRequest);
373         };
374 
375         // Create the reply
376         let mut reply: Vec<u8> = Vec::new();
377 
378         let hdr_len = match req_head.type_ {
379             VIRTIO_IOMMU_T_ATTACH => {
380                 if desc_size_left != size_of::<VirtioIommuReqAttach>() {
381                     return Err(Error::InvalidAttachRequest);
382                 }
383 
384                 let req: VirtioIommuReqAttach = mem
385                     .read_obj(req_addr as GuestAddress)
386                     .map_err(Error::GuestMemory)?;
387                 debug!("Attach request {:?}", req);
388 
389                 // Copy the value to use it as a proper reference.
390                 let domain = req.domain;
391                 let endpoint = req.endpoint;
392 
393                 // Add endpoint associated with specific domain
394                 mapping.endpoints.write().unwrap().insert(endpoint, domain);
395 
396                 // If the endpoint is part of the list of devices with an
397                 // external mapping, insert a new entry for the corresponding
398                 // domain, with the same reference to the trait.
399                 if let Some(map) = ext_mapping.get(&endpoint) {
400                     ext_domain_mapping.insert(domain, map.clone());
401                 }
402 
403                 // Add new domain with no mapping if the entry didn't exist yet
404                 let mut mappings = mapping.mappings.write().unwrap();
405                 mappings.entry(domain).or_insert_with(BTreeMap::new);
406 
407                 0
408             }
409             VIRTIO_IOMMU_T_DETACH => {
410                 if desc_size_left != size_of::<VirtioIommuReqDetach>() {
411                     return Err(Error::InvalidDetachRequest);
412                 }
413 
414                 let req: VirtioIommuReqDetach = mem
415                     .read_obj(req_addr as GuestAddress)
416                     .map_err(Error::GuestMemory)?;
417                 debug!("Detach request {:?}", req);
418 
419                 // Copy the value to use it as a proper reference.
420                 let domain = req.domain;
421                 let endpoint = req.endpoint;
422 
423                 // If the endpoint is part of the list of devices with an
424                 // external mapping, remove the entry for the corresponding
425                 // domain.
426                 if ext_mapping.contains_key(&endpoint) {
427                     ext_domain_mapping.remove(&domain);
428                 }
429 
430                 // Remove endpoint associated with specific domain
431                 mapping.endpoints.write().unwrap().remove(&endpoint);
432 
433                 0
434             }
435             VIRTIO_IOMMU_T_MAP => {
436                 if desc_size_left != size_of::<VirtioIommuReqMap>() {
437                     return Err(Error::InvalidMapRequest);
438                 }
439 
440                 let req: VirtioIommuReqMap = mem
441                     .read_obj(req_addr as GuestAddress)
442                     .map_err(Error::GuestMemory)?;
443                 debug!("Map request {:?}", req);
444 
445                 // Copy the value to use it as a proper reference.
446                 let domain = req.domain;
447 
448                 // Trigger external mapping if necessary.
449                 if let Some(ext_map) = ext_domain_mapping.get(&domain) {
450                     let size = req.virt_end - req.virt_start + 1;
451                     ext_map
452                         .map(req.virt_start, req.phys_start, size)
453                         .map_err(Error::ExternalMapping)?;
454                 }
455 
456                 // Add new mapping associated with the domain
457                 if let Some(entry) = mapping.mappings.write().unwrap().get_mut(&domain) {
458                     entry.insert(
459                         req.virt_start,
460                         Mapping {
461                             gpa: req.phys_start,
462                             size: req.virt_end - req.virt_start + 1,
463                         },
464                     );
465                 } else {
466                     return Err(Error::InvalidMapRequest);
467                 }
468 
469                 0
470             }
471             VIRTIO_IOMMU_T_UNMAP => {
472                 if desc_size_left != size_of::<VirtioIommuReqUnmap>() {
473                     return Err(Error::InvalidUnmapRequest);
474                 }
475 
476                 let req: VirtioIommuReqUnmap = mem
477                     .read_obj(req_addr as GuestAddress)
478                     .map_err(Error::GuestMemory)?;
479                 debug!("Unmap request {:?}", req);
480 
481                 // Copy the value to use it as a proper reference.
482                 let domain = req.domain;
483                 let virt_start = req.virt_start;
484 
485                 // Trigger external unmapping if necessary.
486                 if let Some(ext_map) = ext_domain_mapping.get(&domain) {
487                     let size = req.virt_end - virt_start + 1;
488                     ext_map
489                         .unmap(virt_start, size)
490                         .map_err(Error::ExternalUnmapping)?;
491                 }
492 
493                 // Add new mapping associated with the domain
494                 if let Some(entry) = mapping.mappings.write().unwrap().get_mut(&domain) {
495                     entry.remove(&virt_start);
496                 }
497 
498                 0
499             }
500             VIRTIO_IOMMU_T_PROBE => {
501                 if desc_size_left != size_of::<VirtioIommuReqProbe>() {
502                     return Err(Error::InvalidProbeRequest);
503                 }
504 
505                 let req: VirtioIommuReqProbe = mem
506                     .read_obj(req_addr as GuestAddress)
507                     .map_err(Error::GuestMemory)?;
508                 debug!("Probe request {:?}", req);
509 
510                 let probe_prop = VirtioIommuProbeProperty {
511                     type_: VIRTIO_IOMMU_PROBE_T_RESV_MEM,
512                     length: size_of::<VirtioIommuProbeResvMem>() as u16,
513                 };
514                 reply.extend_from_slice(probe_prop.as_slice());
515 
516                 let resv_mem = VirtioIommuProbeResvMem {
517                     subtype: VIRTIO_IOMMU_RESV_MEM_T_MSI,
518                     start: MSI_IOVA_START,
519                     end: MSI_IOVA_END,
520                     ..Default::default()
521                 };
522                 reply.extend_from_slice(resv_mem.as_slice());
523 
524                 PROBE_PROP_SIZE
525             }
526             _ => return Err(Error::InvalidRequest),
527         };
528 
529         let status_desc = avail_desc
530             .next_descriptor()
531             .ok_or(Error::DescriptorChainTooShort)?;
532 
533         // The status MUST always be writable
534         if !status_desc.is_write_only() {
535             return Err(Error::UnexpectedReadOnlyDescriptor);
536         }
537 
538         if status_desc.len < hdr_len + size_of::<VirtioIommuReqTail>() as u32 {
539             return Err(Error::BufferLengthTooSmall);
540         }
541 
542         let tail = VirtioIommuReqTail {
543             status: VIRTIO_IOMMU_S_OK,
544             ..Default::default()
545         };
546         reply.extend_from_slice(tail.as_slice());
547 
548         mem.write_slice(reply.as_slice(), status_desc.addr)
549             .map_err(Error::GuestMemory)?;
550 
551         Ok((hdr_len as usize) + size_of::<VirtioIommuReqTail>())
552     }
553 }
554 
555 struct IommuEpollHandler {
556     queues: Vec<Queue>,
557     mem: GuestMemoryAtomic<GuestMemoryMmap>,
558     interrupt_cb: Arc<dyn VirtioInterrupt>,
559     queue_evts: Vec<EventFd>,
560     kill_evt: EventFd,
561     pause_evt: EventFd,
562     mapping: Arc<IommuMapping>,
563     ext_mapping: BTreeMap<u32, Arc<dyn ExternalDmaMapping>>,
564     ext_domain_mapping: BTreeMap<u32, Arc<dyn ExternalDmaMapping>>,
565 }
566 
567 impl IommuEpollHandler {
568     fn request_queue(&mut self) -> bool {
569         let mut used_desc_heads = [(0, 0); QUEUE_SIZE as usize];
570         let mut used_count = 0;
571         let mem = self.mem.memory();
572         for avail_desc in self.queues[0].iter(&mem) {
573             let len = match Request::parse(
574                 &avail_desc,
575                 &mem,
576                 &self.mapping,
577                 &self.ext_mapping,
578                 &mut self.ext_domain_mapping,
579             ) {
580                 Ok(len) => len as u32,
581                 Err(e) => {
582                     error!("failed parsing descriptor: {}", e);
583                     0
584                 }
585             };
586 
587             used_desc_heads[used_count] = (avail_desc.index, len);
588             used_count += 1;
589         }
590 
591         for &(desc_index, len) in &used_desc_heads[..used_count] {
592             self.queues[0].add_used(&mem, desc_index, len);
593         }
594         used_count > 0
595     }
596 
597     fn event_queue(&mut self) -> bool {
598         false
599     }
600 
601     fn signal_used_queue(&self, queue: &Queue) -> result::Result<(), DeviceError> {
602         self.interrupt_cb
603             .trigger(&VirtioInterruptType::Queue, Some(queue))
604             .map_err(|e| {
605                 error!("Failed to signal used queue: {:?}", e);
606                 DeviceError::FailedSignalingUsedQueue(e)
607             })
608     }
609 
610     fn run(
611         &mut self,
612         paused: Arc<AtomicBool>,
613         paused_sync: Arc<Barrier>,
614     ) -> result::Result<(), EpollHelperError> {
615         let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?;
616         helper.add_event(self.queue_evts[0].as_raw_fd(), REQUEST_Q_EVENT)?;
617         helper.add_event(self.queue_evts[1].as_raw_fd(), EVENT_Q_EVENT)?;
618         helper.run(paused, paused_sync, self)?;
619 
620         Ok(())
621     }
622 }
623 
624 impl EpollHelperHandler for IommuEpollHandler {
625     fn handle_event(&mut self, _helper: &mut EpollHelper, event: &epoll::Event) -> bool {
626         let ev_type = event.data as u16;
627         match ev_type {
628             REQUEST_Q_EVENT => {
629                 if let Err(e) = self.queue_evts[0].read() {
630                     error!("Failed to get queue event: {:?}", e);
631                     return true;
632                 } else if self.request_queue() {
633                     if let Err(e) = self.signal_used_queue(&self.queues[0]) {
634                         error!("Failed to signal used queue: {:?}", e);
635                         return true;
636                     }
637                 }
638             }
639             EVENT_Q_EVENT => {
640                 if let Err(e) = self.queue_evts[1].read() {
641                     error!("Failed to get queue event: {:?}", e);
642                     return true;
643                 } else if self.event_queue() {
644                     if let Err(e) = self.signal_used_queue(&self.queues[1]) {
645                         error!("Failed to signal used queue: {:?}", e);
646                         return true;
647                     }
648                 }
649             }
650             _ => {
651                 error!("Unexpected event: {}", ev_type);
652                 return true;
653             }
654         }
655         false
656     }
657 }
658 
659 #[derive(Clone, Copy, Versionize)]
660 struct Mapping {
661     gpa: u64,
662     size: u64,
663 }
664 
665 pub struct IommuMapping {
666     // Domain related to an endpoint.
667     endpoints: Arc<RwLock<BTreeMap<u32, u32>>>,
668     // List of mappings per domain.
669     mappings: Arc<RwLock<BTreeMap<u32, BTreeMap<u64, Mapping>>>>,
670 }
671 
672 impl DmaRemapping for IommuMapping {
673     fn translate(&self, id: u32, addr: u64) -> std::result::Result<u64, std::io::Error> {
674         debug!("Translate addr 0x{:x}", addr);
675         if let Some(domain) = self.endpoints.read().unwrap().get(&id) {
676             if let Some(mapping) = self.mappings.read().unwrap().get(domain) {
677                 let range_start = if VIRTIO_IOMMU_PAGE_SIZE_MASK > addr {
678                     0
679                 } else {
680                     addr - VIRTIO_IOMMU_PAGE_SIZE_MASK
681                 };
682                 for (&key, &value) in mapping.range((Included(&range_start), Included(&addr))) {
683                     if addr >= key && addr < key + value.size {
684                         let new_addr = addr - key + value.gpa;
685                         debug!("Into new_addr 0x{:x}", new_addr);
686                         return Ok(new_addr);
687                     }
688                 }
689             }
690         }
691 
692         debug!("Into same addr...");
693         Ok(addr)
694     }
695 }
696 
697 pub struct Iommu {
698     common: VirtioCommon,
699     id: String,
700     config: VirtioIommuConfig,
701     mapping: Arc<IommuMapping>,
702     ext_mapping: BTreeMap<u32, Arc<dyn ExternalDmaMapping>>,
703     seccomp_action: SeccompAction,
704     exit_evt: EventFd,
705 }
706 
707 #[derive(Versionize)]
708 struct IommuState {
709     avail_features: u64,
710     acked_features: u64,
711     endpoints: Vec<(u32, u32)>,
712     mappings: Vec<(u32, Vec<(u64, Mapping)>)>,
713 }
714 
715 impl VersionMapped for IommuState {}
716 
717 impl Iommu {
718     pub fn new(
719         id: String,
720         seccomp_action: SeccompAction,
721         exit_evt: EventFd,
722     ) -> io::Result<(Self, Arc<IommuMapping>)> {
723         let config = VirtioIommuConfig {
724             page_size_mask: VIRTIO_IOMMU_PAGE_SIZE_MASK,
725             probe_size: PROBE_PROP_SIZE,
726             ..Default::default()
727         };
728 
729         let mapping = Arc::new(IommuMapping {
730             endpoints: Arc::new(RwLock::new(BTreeMap::new())),
731             mappings: Arc::new(RwLock::new(BTreeMap::new())),
732         });
733 
734         Ok((
735             Iommu {
736                 id,
737                 common: VirtioCommon {
738                     device_type: VirtioDeviceType::Iommu as u32,
739                     queue_sizes: QUEUE_SIZES.to_vec(),
740                     avail_features: 1u64 << VIRTIO_F_VERSION_1
741                         | 1u64 << VIRTIO_IOMMU_F_MAP_UNMAP
742                         | 1u64 << VIRTIO_IOMMU_F_PROBE,
743                     paused_sync: Some(Arc::new(Barrier::new(2))),
744                     ..Default::default()
745                 },
746                 config,
747                 mapping: mapping.clone(),
748                 ext_mapping: BTreeMap::new(),
749                 seccomp_action,
750                 exit_evt,
751             },
752             mapping,
753         ))
754     }
755 
756     fn state(&self) -> IommuState {
757         IommuState {
758             avail_features: self.common.avail_features,
759             acked_features: self.common.acked_features,
760             endpoints: self
761                 .mapping
762                 .endpoints
763                 .read()
764                 .unwrap()
765                 .clone()
766                 .into_iter()
767                 .collect(),
768             mappings: self
769                 .mapping
770                 .mappings
771                 .read()
772                 .unwrap()
773                 .clone()
774                 .into_iter()
775                 .map(|(k, v)| (k, v.into_iter().collect()))
776                 .collect(),
777         }
778     }
779 
780     fn set_state(&mut self, state: &IommuState) {
781         self.common.avail_features = state.avail_features;
782         self.common.acked_features = state.acked_features;
783         *(self.mapping.endpoints.write().unwrap()) = state.endpoints.clone().into_iter().collect();
784         *(self.mapping.mappings.write().unwrap()) = state
785             .mappings
786             .clone()
787             .into_iter()
788             .map(|(k, v)| (k, v.into_iter().collect()))
789             .collect();
790     }
791 
792     pub fn add_external_mapping(&mut self, device_id: u32, mapping: Arc<dyn ExternalDmaMapping>) {
793         self.ext_mapping.insert(device_id, mapping);
794     }
795 }
796 
797 impl Drop for Iommu {
798     fn drop(&mut self) {
799         if let Some(kill_evt) = self.common.kill_evt.take() {
800             // Ignore the result because there is nothing we can do about it.
801             let _ = kill_evt.write(1);
802         }
803     }
804 }
805 
806 impl VirtioDevice for Iommu {
807     fn device_type(&self) -> u32 {
808         self.common.device_type
809     }
810 
811     fn queue_max_sizes(&self) -> &[u16] {
812         &self.common.queue_sizes
813     }
814 
815     fn features(&self) -> u64 {
816         self.common.avail_features
817     }
818 
819     fn ack_features(&mut self, value: u64) {
820         self.common.ack_features(value)
821     }
822 
823     fn read_config(&self, offset: u64, data: &mut [u8]) {
824         self.read_config_from_slice(self.config.as_slice(), offset, data);
825     }
826 
827     fn activate(
828         &mut self,
829         mem: GuestMemoryAtomic<GuestMemoryMmap>,
830         interrupt_cb: Arc<dyn VirtioInterrupt>,
831         queues: Vec<Queue>,
832         queue_evts: Vec<EventFd>,
833     ) -> ActivateResult {
834         self.common.activate(&queues, &queue_evts, &interrupt_cb)?;
835         let (kill_evt, pause_evt) = self.common.dup_eventfds();
836         let mut handler = IommuEpollHandler {
837             queues,
838             mem,
839             interrupt_cb,
840             queue_evts,
841             kill_evt,
842             pause_evt,
843             mapping: self.mapping.clone(),
844             ext_mapping: self.ext_mapping.clone(),
845             ext_domain_mapping: BTreeMap::new(),
846         };
847 
848         let paused = self.common.paused.clone();
849         let paused_sync = self.common.paused_sync.clone();
850         let mut epoll_threads = Vec::new();
851         spawn_virtio_thread(
852             &self.id,
853             &self.seccomp_action,
854             Thread::VirtioIommu,
855             &mut epoll_threads,
856             &self.exit_evt,
857             move || {
858                 if let Err(e) = handler.run(paused, paused_sync.unwrap()) {
859                     error!("Error running worker: {:?}", e);
860                 }
861             },
862         )?;
863 
864         self.common.epoll_threads = Some(epoll_threads);
865 
866         event!("virtio-device", "activated", "id", &self.id);
867         Ok(())
868     }
869 
870     fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
871         let result = self.common.reset();
872         event!("virtio-device", "reset", "id", &self.id);
873         result
874     }
875 }
876 
877 impl Pausable for Iommu {
878     fn pause(&mut self) -> result::Result<(), MigratableError> {
879         self.common.pause()
880     }
881 
882     fn resume(&mut self) -> result::Result<(), MigratableError> {
883         self.common.resume()
884     }
885 }
886 
887 impl Snapshottable for Iommu {
888     fn id(&self) -> String {
889         self.id.clone()
890     }
891 
892     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
893         Snapshot::new_from_versioned_state(&self.id, &self.state())
894     }
895 
896     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
897         self.set_state(&snapshot.to_versioned_state(&self.id)?);
898         Ok(())
899     }
900 }
901 impl Transportable for Iommu {}
902 impl Migratable for Iommu {}
903